From 1609660791cbf83e7ffa3e5e1877f08c0ae986d7 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Mon, 3 Feb 2025 15:04:27 +0100 Subject: [PATCH 01/10] feat: Add ContentLayer attribute to designate items to body or furniture Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 70 +++++++++++++++++-- .../data/doc/constructed_doc.embedded.json.gt | 23 ++++++ .../data/doc/constructed_doc.embedded.yaml.gt | 23 ++++++ .../doc/constructed_doc.referenced.json.gt | 23 ++++++ .../doc/constructed_doc.referenced.yaml.gt | 23 ++++++ test/data/docling_document/unit/CodeItem.yaml | 1 + .../docling_document/unit/FloatingItem.yaml | 1 + .../docling_document/unit/KeyValueItem.yaml | 1 + test/data/docling_document/unit/ListItem.yaml | 3 +- .../docling_document/unit/PictureItem.yaml | 1 + .../unit/SectionHeaderItem.yaml | 1 + .../data/docling_document/unit/TableItem.yaml | 1 + test/data/docling_document/unit/TextItem.yaml | 1 + 13 files changed, 166 insertions(+), 6 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index b0e1007f..165ac707 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -12,6 +12,7 @@ import textwrap import typing import warnings +from enum import Enum from io import BytesIO from pathlib import Path from typing import Any, Dict, Final, List, Literal, Optional, Tuple, Union @@ -505,6 +506,13 @@ class ProvenanceItem(BaseModel): charspan: Tuple[int, int] +class ContentLayer(str, Enum): + """ContentLayer.""" + + BODY = "body" + FURNITURE = "furniture" + + class NodeItem(BaseModel): """NodeItem.""" @@ -512,6 +520,8 @@ class NodeItem(BaseModel): parent: Optional[RefItem] = None children: List[RefItem] = [] + content_layer: ContentLayer = ContentLayer.BODY + model_config = ConfigDict(extra="forbid") def get_ref(self): @@ -1419,7 +1429,7 @@ class DoclingDocument(BaseModel): # generated from synthetic data. ) - furniture: GroupItem = GroupItem( + furniture: Annotated[GroupItem, Field(deprecated=True)] = GroupItem( name="_root_", self_ref="#/furniture" ) # List[RefItem] = [] body: GroupItem = GroupItem(name="_root_", self_ref="#/body") # List[RefItem] = [] @@ -1437,6 +1447,7 @@ def add_group( label: Optional[GroupLabel] = None, name: Optional[str] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ) -> GroupItem: """add_group. @@ -1456,6 +1467,8 @@ def add_group( group.name = name if label is not None: group.label = label + if content_layer: + group.content_layer = content_layer self.groups.append(group) parent.children.append(RefItem(cref=cref)) @@ -1470,6 +1483,7 @@ def add_list_item( orig: Optional[str] = None, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_list_item. @@ -1500,6 +1514,8 @@ def add_list_item( ) if prov: list_item.prov.append(prov) + if content_layer: + list_item.content_layer = content_layer self.texts.append(list_item) parent.children.append(RefItem(cref=cref)) @@ -1513,6 +1529,7 @@ def add_text( orig: Optional[str] = None, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_text. @@ -1526,16 +1543,40 @@ def add_text( # Catch a few cases that are in principle allowed # but that will create confusion down the road if label in [DocItemLabel.TITLE]: - return self.add_title(text=text, orig=orig, prov=prov, parent=parent) + return self.add_title( + text=text, + orig=orig, + prov=prov, + parent=parent, + content_layer=content_layer, + ) elif label in [DocItemLabel.LIST_ITEM]: - return self.add_list_item(text=text, orig=orig, prov=prov, parent=parent) + return self.add_list_item( + text=text, + orig=orig, + prov=prov, + parent=parent, + content_layer=content_layer, + ) elif label in [DocItemLabel.SECTION_HEADER]: - return self.add_heading(text=text, orig=orig, prov=prov, parent=parent) + return self.add_heading( + text=text, + orig=orig, + prov=prov, + parent=parent, + content_layer=content_layer, + ) elif label in [DocItemLabel.CODE]: - return self.add_code(text=text, orig=orig, prov=prov, parent=parent) + return self.add_code( + text=text, + orig=orig, + prov=prov, + parent=parent, + content_layer=content_layer, + ) else: @@ -1557,6 +1598,9 @@ def add_text( if prov: text_item.prov.append(prov) + if content_layer: + text_item.content_layer = content_layer + self.texts.append(text_item) parent.children.append(RefItem(cref=cref)) @@ -1569,6 +1613,7 @@ def add_table( prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, label: DocItemLabel = DocItemLabel.TABLE, + content_layer: Optional[ContentLayer] = None, ): """add_table. @@ -1590,6 +1635,9 @@ def add_table( ) if prov: tbl_item.prov.append(prov) + if content_layer: + tbl_item.content_layer = content_layer + if caption: tbl_item.captions.append(caption.get_ref()) @@ -1605,6 +1653,7 @@ def add_picture( caption: Optional[Union[TextItem, RefItem]] = None, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_picture. @@ -1629,6 +1678,8 @@ def add_picture( ) if prov: fig_item.prov.append(prov) + if content_layer: + fig_item.content_layer = content_layer if caption: fig_item.captions.append(caption.get_ref()) @@ -1643,6 +1694,7 @@ def add_title( orig: Optional[str] = None, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_title. @@ -1668,6 +1720,8 @@ def add_title( ) if prov: text_item.prov.append(prov) + if content_layer: + text_item.content_layer = content_layer self.texts.append(text_item) parent.children.append(RefItem(cref=cref)) @@ -1681,6 +1735,7 @@ def add_code( orig: Optional[str] = None, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_code. @@ -1706,6 +1761,8 @@ def add_code( ) if code_language: code_item.code_language = code_language + if content_layer: + code_item.content_layer = content_layer if prov: code_item.prov.append(prov) @@ -1721,6 +1778,7 @@ def add_heading( level: LevelNumber = 1, prov: Optional[ProvenanceItem] = None, parent: Optional[NodeItem] = None, + content_layer: Optional[ContentLayer] = None, ): """add_heading. @@ -1748,6 +1806,8 @@ def add_heading( ) if prov: section_header_item.prov.append(prov) + if content_layer: + section_header_item.content_layer = content_layer self.texts.append(section_header_item) parent.children.append(RefItem(cref=cref)) diff --git a/test/data/doc/constructed_doc.embedded.json.gt b/test/data/doc/constructed_doc.embedded.json.gt index 368a11e9..b6602d32 100644 --- a/test/data/doc/constructed_doc.embedded.json.gt +++ b/test/data/doc/constructed_doc.embedded.json.gt @@ -5,6 +5,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -36,6 +37,7 @@ "$ref": "#/pictures/1" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -56,6 +58,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "name": "Introduction", "label": "chapter" }, @@ -81,6 +84,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "name": "group", "label": "list" }, @@ -100,6 +104,7 @@ "$ref": "#/texts/10" } ], + "content_layer": "body", "name": "group", "label": "ordered_list" } @@ -118,6 +123,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Title of the Document", @@ -129,6 +135,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Author 1\nAffiliation 1", @@ -140,6 +147,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Author 2\nAffiliation 2", @@ -151,6 +159,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "1. Introduction", @@ -163,6 +172,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "This paper introduces the biggest invention ever made. ...", @@ -174,6 +184,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 1", @@ -187,6 +198,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 2", @@ -200,6 +212,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3", @@ -213,6 +226,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.a", @@ -226,6 +240,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.b", @@ -239,6 +254,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.c", @@ -252,6 +268,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 4", @@ -265,6 +282,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of table 1.", @@ -276,6 +294,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of figure 1.", @@ -287,6 +306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of figure 2.", @@ -300,6 +320,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -317,6 +338,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -345,6 +367,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ diff --git a/test/data/doc/constructed_doc.embedded.yaml.gt b/test/data/doc/constructed_doc.embedded.yaml.gt index 0184d8b4..009768fb 100644 --- a/test/data/doc/constructed_doc.embedded.yaml.gt +++ b/test/data/doc/constructed_doc.embedded.yaml.gt @@ -8,11 +8,13 @@ body: - $ref: '#/pictures/0' - $ref: '#/texts/14' - $ref: '#/pictures/1' + content_layer: body label: unspecified name: _root_ self_ref: '#/body' furniture: children: [] + content_layer: body label: unspecified name: _root_ self_ref: '#/furniture' @@ -21,6 +23,7 @@ groups: - $ref: '#/texts/3' - $ref: '#/texts/4' - $ref: '#/groups/1' + content_layer: body label: chapter name: Introduction parent: @@ -32,6 +35,7 @@ groups: - $ref: '#/texts/7' - $ref: '#/groups/2' - $ref: '#/texts/11' + content_layer: body label: list name: group parent: @@ -41,6 +45,7 @@ groups: - $ref: '#/texts/8' - $ref: '#/texts/9' - $ref: '#/texts/10' + content_layer: body label: ordered_list name: group parent: @@ -54,6 +59,7 @@ pictures: captions: - $ref: '#/texts/13' children: [] + content_layer: body footnotes: [] label: picture parent: @@ -65,6 +71,7 @@ pictures: captions: - $ref: '#/texts/14' children: [] + content_layer: body footnotes: [] image: dpi: 72 @@ -84,6 +91,7 @@ tables: - captions: - $ref: '#/texts/12' children: [] + content_layer: body data: grid: - - col_span: 1 @@ -260,6 +268,7 @@ texts: - children: - $ref: '#/texts/1' - $ref: '#/texts/2' + content_layer: body label: title orig: Title of the Document parent: @@ -268,6 +277,7 @@ texts: self_ref: '#/texts/0' text: Title of the Document - children: [] + content_layer: body label: text orig: 'Author 1 @@ -280,6 +290,7 @@ texts: Affiliation 1' - children: [] + content_layer: body label: text orig: 'Author 2 @@ -292,6 +303,7 @@ texts: Affiliation 2' - children: [] + content_layer: body label: section_header level: 1 orig: 1. Introduction @@ -301,6 +313,7 @@ texts: self_ref: '#/texts/3' text: 1. Introduction - children: [] + content_layer: body label: text orig: This paper introduces the biggest invention ever made. ... parent: @@ -309,6 +322,7 @@ texts: self_ref: '#/texts/4' text: This paper introduces the biggest invention ever made. ... - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -319,6 +333,7 @@ texts: self_ref: '#/texts/5' text: list item 1 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -329,6 +344,7 @@ texts: self_ref: '#/texts/6' text: list item 2 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -339,6 +355,7 @@ texts: self_ref: '#/texts/7' text: list item 3 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -349,6 +366,7 @@ texts: self_ref: '#/texts/8' text: list item 3.a - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -359,6 +377,7 @@ texts: self_ref: '#/texts/9' text: list item 3.b - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -369,6 +388,7 @@ texts: self_ref: '#/texts/10' text: list item 3.c - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -379,6 +399,7 @@ texts: self_ref: '#/texts/11' text: list item 4 - children: [] + content_layer: body label: caption orig: This is the caption of table 1. parent: @@ -387,6 +408,7 @@ texts: self_ref: '#/texts/12' text: This is the caption of table 1. - children: [] + content_layer: body label: caption orig: This is the caption of figure 1. parent: @@ -395,6 +417,7 @@ texts: self_ref: '#/texts/13' text: This is the caption of figure 1. - children: [] + content_layer: body label: caption orig: This is the caption of figure 2. parent: diff --git a/test/data/doc/constructed_doc.referenced.json.gt b/test/data/doc/constructed_doc.referenced.json.gt index ea334402..77c1fb0a 100644 --- a/test/data/doc/constructed_doc.referenced.json.gt +++ b/test/data/doc/constructed_doc.referenced.json.gt @@ -5,6 +5,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -36,6 +37,7 @@ "$ref": "#/pictures/1" } ], + "content_layer": "body", "name": "_root_", "label": "unspecified" }, @@ -56,6 +58,7 @@ "$ref": "#/groups/1" } ], + "content_layer": "body", "name": "Introduction", "label": "chapter" }, @@ -81,6 +84,7 @@ "$ref": "#/texts/11" } ], + "content_layer": "body", "name": "group", "label": "list" }, @@ -100,6 +104,7 @@ "$ref": "#/texts/10" } ], + "content_layer": "body", "name": "group", "label": "ordered_list" } @@ -118,6 +123,7 @@ "$ref": "#/texts/2" } ], + "content_layer": "body", "label": "title", "prov": [], "orig": "Title of the Document", @@ -129,6 +135,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Author 1\nAffiliation 1", @@ -140,6 +147,7 @@ "$ref": "#/texts/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "Author 2\nAffiliation 2", @@ -151,6 +159,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "section_header", "prov": [], "orig": "1. Introduction", @@ -163,6 +172,7 @@ "$ref": "#/groups/0" }, "children": [], + "content_layer": "body", "label": "text", "prov": [], "orig": "This paper introduces the biggest invention ever made. ...", @@ -174,6 +184,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 1", @@ -187,6 +198,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 2", @@ -200,6 +212,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3", @@ -213,6 +226,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.a", @@ -226,6 +240,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.b", @@ -239,6 +254,7 @@ "$ref": "#/groups/2" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 3.c", @@ -252,6 +268,7 @@ "$ref": "#/groups/1" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [], "orig": "list item 4", @@ -265,6 +282,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of table 1.", @@ -276,6 +294,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of figure 1.", @@ -287,6 +306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [], "orig": "This is the caption of figure 2.", @@ -300,6 +320,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -317,6 +338,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "picture", "prov": [], "captions": [ @@ -345,6 +367,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [], "captions": [ diff --git a/test/data/doc/constructed_doc.referenced.yaml.gt b/test/data/doc/constructed_doc.referenced.yaml.gt index 98bdbf47..55cf4c2d 100644 --- a/test/data/doc/constructed_doc.referenced.yaml.gt +++ b/test/data/doc/constructed_doc.referenced.yaml.gt @@ -8,11 +8,13 @@ body: - $ref: '#/pictures/0' - $ref: '#/texts/14' - $ref: '#/pictures/1' + content_layer: body label: unspecified name: _root_ self_ref: '#/body' furniture: children: [] + content_layer: body label: unspecified name: _root_ self_ref: '#/furniture' @@ -21,6 +23,7 @@ groups: - $ref: '#/texts/3' - $ref: '#/texts/4' - $ref: '#/groups/1' + content_layer: body label: chapter name: Introduction parent: @@ -32,6 +35,7 @@ groups: - $ref: '#/texts/7' - $ref: '#/groups/2' - $ref: '#/texts/11' + content_layer: body label: list name: group parent: @@ -41,6 +45,7 @@ groups: - $ref: '#/texts/8' - $ref: '#/texts/9' - $ref: '#/texts/10' + content_layer: body label: ordered_list name: group parent: @@ -54,6 +59,7 @@ pictures: captions: - $ref: '#/texts/13' children: [] + content_layer: body footnotes: [] label: picture parent: @@ -65,6 +71,7 @@ pictures: captions: - $ref: '#/texts/14' children: [] + content_layer: body footnotes: [] image: dpi: 72 @@ -84,6 +91,7 @@ tables: - captions: - $ref: '#/texts/12' children: [] + content_layer: body data: grid: - - col_span: 1 @@ -260,6 +268,7 @@ texts: - children: - $ref: '#/texts/1' - $ref: '#/texts/2' + content_layer: body label: title orig: Title of the Document parent: @@ -268,6 +277,7 @@ texts: self_ref: '#/texts/0' text: Title of the Document - children: [] + content_layer: body label: text orig: 'Author 1 @@ -280,6 +290,7 @@ texts: Affiliation 1' - children: [] + content_layer: body label: text orig: 'Author 2 @@ -292,6 +303,7 @@ texts: Affiliation 2' - children: [] + content_layer: body label: section_header level: 1 orig: 1. Introduction @@ -301,6 +313,7 @@ texts: self_ref: '#/texts/3' text: 1. Introduction - children: [] + content_layer: body label: text orig: This paper introduces the biggest invention ever made. ... parent: @@ -309,6 +322,7 @@ texts: self_ref: '#/texts/4' text: This paper introduces the biggest invention ever made. ... - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -319,6 +333,7 @@ texts: self_ref: '#/texts/5' text: list item 1 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -329,6 +344,7 @@ texts: self_ref: '#/texts/6' text: list item 2 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -339,6 +355,7 @@ texts: self_ref: '#/texts/7' text: list item 3 - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -349,6 +366,7 @@ texts: self_ref: '#/texts/8' text: list item 3.a - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -359,6 +377,7 @@ texts: self_ref: '#/texts/9' text: list item 3.b - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -369,6 +388,7 @@ texts: self_ref: '#/texts/10' text: list item 3.c - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -379,6 +399,7 @@ texts: self_ref: '#/texts/11' text: list item 4 - children: [] + content_layer: body label: caption orig: This is the caption of table 1. parent: @@ -387,6 +408,7 @@ texts: self_ref: '#/texts/12' text: This is the caption of table 1. - children: [] + content_layer: body label: caption orig: This is the caption of figure 1. parent: @@ -395,6 +417,7 @@ texts: self_ref: '#/texts/13' text: This is the caption of figure 1. - children: [] + content_layer: body label: caption orig: This is the caption of figure 2. parent: diff --git a/test/data/docling_document/unit/CodeItem.yaml b/test/data/docling_document/unit/CodeItem.yaml index 42979da3..f5238e01 100644 --- a/test/data/docling_document/unit/CodeItem.yaml +++ b/test/data/docling_document/unit/CodeItem.yaml @@ -1,5 +1,6 @@ children: [] code_language: Python +content_layer: body label: code orig: whatever parent: null diff --git a/test/data/docling_document/unit/FloatingItem.yaml b/test/data/docling_document/unit/FloatingItem.yaml index 1a816a85..21beef40 100644 --- a/test/data/docling_document/unit/FloatingItem.yaml +++ b/test/data/docling_document/unit/FloatingItem.yaml @@ -7,3 +7,4 @@ parent: null prov: [] references: [] self_ref: '#' +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/KeyValueItem.yaml b/test/data/docling_document/unit/KeyValueItem.yaml index 83362f55..1335dc90 100644 --- a/test/data/docling_document/unit/KeyValueItem.yaml +++ b/test/data/docling_document/unit/KeyValueItem.yaml @@ -3,3 +3,4 @@ label: key_value_region parent: null prov: [] self_ref: '#' +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/ListItem.yaml b/test/data/docling_document/unit/ListItem.yaml index e8cc4ae2..b1bb0ca7 100644 --- a/test/data/docling_document/unit/ListItem.yaml +++ b/test/data/docling_document/unit/ListItem.yaml @@ -6,4 +6,5 @@ orig: whatever parent: null prov: [] self_ref: '#' -text: whatever \ No newline at end of file +text: whatever +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/PictureItem.yaml b/test/data/docling_document/unit/PictureItem.yaml index 35348c35..ffe342a6 100644 --- a/test/data/docling_document/unit/PictureItem.yaml +++ b/test/data/docling_document/unit/PictureItem.yaml @@ -8,3 +8,4 @@ parent: null prov: [] references: [] self_ref: '#' +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/SectionHeaderItem.yaml b/test/data/docling_document/unit/SectionHeaderItem.yaml index b4729e3c..7d7d8998 100644 --- a/test/data/docling_document/unit/SectionHeaderItem.yaml +++ b/test/data/docling_document/unit/SectionHeaderItem.yaml @@ -6,3 +6,4 @@ parent: null prov: [] self_ref: '#' text: whatever +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/TableItem.yaml b/test/data/docling_document/unit/TableItem.yaml index 56e5e155..15d9a07f 100644 --- a/test/data/docling_document/unit/TableItem.yaml +++ b/test/data/docling_document/unit/TableItem.yaml @@ -177,3 +177,4 @@ parent: null prov: [] references: [] self_ref: '#' +content_layer: body \ No newline at end of file diff --git a/test/data/docling_document/unit/TextItem.yaml b/test/data/docling_document/unit/TextItem.yaml index aa56c38d..3a72b953 100644 --- a/test/data/docling_document/unit/TextItem.yaml +++ b/test/data/docling_document/unit/TextItem.yaml @@ -5,3 +5,4 @@ parent: null prov: [] self_ref: '#' text: whatever +content_layer: body \ No newline at end of file From 4cc5aea8eba1f00a3f0e878fdaebf5dd11cf77f4 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Mon, 3 Feb 2025 17:08:33 +0100 Subject: [PATCH 02/10] introduce safer data gen mechanism, update chunking test data Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- README.md | 2 +- test/data/chunker/0_out_chunks.json | 125 ++++++++++++++++++++++++++- test/data/chunker/1_out_chunks.json | 125 ++++++++++++++++++++++++++- test/data/chunker/2a_out_chunks.json | 29 ++++++- test/data/chunker/2b_out_chunks.json | 29 ++++++- test/data/chunker/2c_out_chunks.json | 16 +++- test/test_data_gen_flag.py | 9 ++ test/test_hierarchical_chunker.py | 27 ++++-- test/test_hybrid_chunker.py | 55 ++++++++---- 9 files changed, 387 insertions(+), 30 deletions(-) create mode 100644 test/test_data_gen_flag.py diff --git a/README.md b/README.md index be814dd5..b31fc4d7 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ pip install docling-core To develop for Docling Core, you need Python 3.9 / 3.10 / 3.11 / 3.12 / 3.13 and Poetry. You can then install from your local clone's root dir: ```bash -poetry install +poetry install --all-extras ``` To run the pytest suite, execute: diff --git a/test/data/chunker/0_out_chunks.json b/test/data/chunker/0_out_chunks.json index 750f12a8..94cb63fd 100644 --- a/test/data/chunker/0_out_chunks.json +++ b/test/data/chunker/0_out_chunks.json @@ -12,6 +12,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_header", "prov": [ { @@ -50,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -91,6 +93,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -132,6 +135,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -173,6 +177,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -215,6 +220,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -257,6 +263,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -299,6 +306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -341,6 +349,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -383,6 +392,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -425,6 +435,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -449,6 +460,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -473,6 +485,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -497,6 +510,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -521,6 +535,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -545,6 +560,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -587,6 +603,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -629,6 +646,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -671,6 +689,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "code", "prov": [ { @@ -713,6 +732,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -755,6 +775,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -797,6 +818,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -840,6 +862,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "footnote", "prov": [ { @@ -883,6 +906,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -926,6 +950,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -969,6 +994,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1012,6 +1038,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1055,6 +1082,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1098,6 +1126,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1142,6 +1171,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1186,6 +1216,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1230,6 +1261,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -1274,6 +1306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1318,6 +1351,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1362,6 +1396,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1406,6 +1441,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1449,6 +1485,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1492,6 +1529,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1535,6 +1573,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1577,6 +1616,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1619,6 +1659,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1661,6 +1702,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -1703,6 +1745,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1745,6 +1788,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -1787,6 +1831,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1832,6 +1877,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1874,6 +1920,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1916,6 +1963,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1958,6 +2006,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -1982,6 +2031,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2024,6 +2074,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -2066,6 +2117,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2108,6 +2160,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2132,6 +2185,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2156,6 +2210,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2180,6 +2235,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2204,6 +2260,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2228,6 +2285,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2252,6 +2310,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2276,6 +2335,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2300,6 +2360,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2324,6 +2385,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2348,6 +2410,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2372,6 +2435,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2396,6 +2460,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2420,6 +2485,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2462,6 +2528,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -2504,6 +2571,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2546,6 +2614,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2588,6 +2657,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2630,6 +2700,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2672,6 +2743,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2714,6 +2786,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2756,6 +2829,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2798,6 +2872,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2840,6 +2915,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2882,6 +2958,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2924,6 +3001,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2966,6 +3044,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3008,6 +3087,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3050,6 +3130,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3092,6 +3173,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3134,6 +3216,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3176,6 +3259,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3218,6 +3302,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3260,6 +3345,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3302,6 +3388,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3344,6 +3431,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3386,6 +3474,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -3428,6 +3517,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3470,6 +3560,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3512,6 +3603,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3554,6 +3646,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3596,6 +3689,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -3638,6 +3732,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_header", "prov": [ { @@ -3680,6 +3775,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3722,6 +3818,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -3764,6 +3861,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -3809,6 +3907,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3851,6 +3950,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3893,6 +3993,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -3935,6 +4036,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3977,6 +4079,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4019,6 +4122,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4061,6 +4165,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4103,6 +4208,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4145,6 +4251,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -4187,6 +4294,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4229,6 +4337,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4271,6 +4380,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4313,6 +4423,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4355,6 +4466,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4397,6 +4509,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4439,6 +4552,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -4481,6 +4595,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4523,6 +4638,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4565,6 +4681,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -4610,6 +4727,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4652,6 +4770,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4694,6 +4813,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4736,6 +4856,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4778,6 +4899,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -4820,6 +4942,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -4851,4 +4974,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/chunker/1_out_chunks.json b/test/data/chunker/1_out_chunks.json index 2b141c93..a9e55a20 100644 --- a/test/data/chunker/1_out_chunks.json +++ b/test/data/chunker/1_out_chunks.json @@ -12,6 +12,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_header", "prov": [ { @@ -50,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -91,6 +93,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -132,6 +135,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -173,6 +177,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -215,6 +220,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -257,6 +263,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -299,6 +306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -341,6 +349,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -383,6 +392,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -425,6 +435,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -467,6 +478,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -509,6 +521,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -551,6 +564,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -593,6 +607,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -635,6 +650,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -677,6 +693,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -719,6 +736,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -761,6 +779,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "code", "prov": [ { @@ -803,6 +822,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -845,6 +865,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -887,6 +908,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -930,6 +952,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "footnote", "prov": [ { @@ -973,6 +996,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -1016,6 +1040,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -1059,6 +1084,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1102,6 +1128,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1145,6 +1172,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1188,6 +1216,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1232,6 +1261,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1276,6 +1306,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1320,6 +1351,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -1364,6 +1396,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1408,6 +1441,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1452,6 +1486,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1496,6 +1531,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1539,6 +1575,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1582,6 +1619,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1625,6 +1663,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1667,6 +1706,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1709,6 +1749,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1751,6 +1792,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -1793,6 +1835,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1835,6 +1878,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -1877,6 +1921,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1922,6 +1967,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1964,6 +2010,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2006,6 +2053,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2048,6 +2096,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2090,6 +2139,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2132,6 +2182,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -2174,6 +2225,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2216,6 +2268,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2258,6 +2311,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2300,6 +2354,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2342,6 +2397,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2384,6 +2440,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2426,6 +2483,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2468,6 +2526,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2510,6 +2569,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2552,6 +2612,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2594,6 +2655,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2636,6 +2698,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2678,6 +2741,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2720,6 +2784,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2762,6 +2827,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "list_item", "prov": [ { @@ -2804,6 +2870,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -2846,6 +2913,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2888,6 +2956,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2930,6 +2999,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -2972,6 +3042,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3014,6 +3085,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3056,6 +3128,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3098,6 +3171,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3140,6 +3214,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3182,6 +3257,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3224,6 +3300,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3266,6 +3343,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3308,6 +3386,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3350,6 +3429,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3392,6 +3472,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3434,6 +3515,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3476,6 +3558,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3518,6 +3601,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3560,6 +3644,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3602,6 +3687,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3644,6 +3730,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3686,6 +3773,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3728,6 +3816,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -3770,6 +3859,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3812,6 +3902,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3854,6 +3945,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3896,6 +3988,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -3938,6 +4031,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -3980,6 +4074,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_header", "prov": [ { @@ -4022,6 +4117,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4064,6 +4160,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4106,6 +4203,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -4151,6 +4249,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4193,6 +4292,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4235,6 +4335,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4277,6 +4378,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4319,6 +4421,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4361,6 +4464,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4403,6 +4507,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4445,6 +4550,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4487,6 +4593,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -4529,6 +4636,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4571,6 +4679,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4613,6 +4722,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4655,6 +4765,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4697,6 +4808,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4739,6 +4851,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4781,6 +4894,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -4823,6 +4937,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -4865,6 +4980,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4907,6 +5023,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -4952,6 +5069,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "caption", "prov": [ { @@ -4994,6 +5112,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -5036,6 +5155,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -5078,6 +5198,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -5120,6 +5241,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "paragraph", "prov": [ { @@ -5162,6 +5284,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "page_footer", "prov": [ { @@ -5193,4 +5316,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/chunker/2a_out_chunks.json b/test/data/chunker/2a_out_chunks.json index a0c7099a..278f5cfe 100644 --- a/test/data/chunker/2a_out_chunks.json +++ b/test/data/chunker/2a_out_chunks.json @@ -12,6 +12,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -50,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -74,6 +76,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -115,6 +118,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -156,6 +160,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -197,6 +202,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -238,6 +244,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -279,6 +286,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -320,6 +328,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -361,6 +370,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -402,6 +412,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -443,6 +454,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -485,6 +497,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -528,6 +541,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -571,6 +585,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -614,6 +629,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -657,6 +673,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -700,6 +717,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -743,6 +761,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -786,6 +805,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -829,6 +849,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -872,6 +893,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -915,6 +937,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -958,6 +981,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -982,6 +1006,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1025,6 +1050,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1049,6 +1075,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1081,4 +1108,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/chunker/2b_out_chunks.json b/test/data/chunker/2b_out_chunks.json index 31f3fba6..014f6530 100644 --- a/test/data/chunker/2b_out_chunks.json +++ b/test/data/chunker/2b_out_chunks.json @@ -12,6 +12,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -50,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -91,6 +93,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -132,6 +135,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -173,6 +177,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -214,6 +219,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -255,6 +261,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -296,6 +303,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -337,6 +345,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -378,6 +387,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -419,6 +429,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -460,6 +471,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -502,6 +514,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -545,6 +558,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -588,6 +602,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -631,6 +646,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -674,6 +690,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -717,6 +734,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -760,6 +778,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -803,6 +822,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -846,6 +866,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -889,6 +910,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -932,6 +954,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -975,6 +998,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1018,6 +1042,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1061,6 +1086,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -1104,6 +1130,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -1136,4 +1163,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/chunker/2c_out_chunks.json b/test/data/chunker/2c_out_chunks.json index 4788e405..900564a8 100644 --- a/test/data/chunker/2c_out_chunks.json +++ b/test/data/chunker/2c_out_chunks.json @@ -12,6 +12,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -50,6 +51,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -74,6 +76,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -98,6 +101,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -122,6 +126,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -146,6 +151,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -170,6 +176,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -211,6 +218,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -253,6 +261,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -296,6 +305,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -339,6 +349,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -382,6 +393,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -406,6 +418,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "table", "prov": [ { @@ -430,6 +443,7 @@ "$ref": "#/body" }, "children": [], + "content_layer": "body", "label": "text", "prov": [ { @@ -462,4 +476,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/test_data_gen_flag.py b/test/test_data_gen_flag.py new file mode 100644 index 00000000..a4baff66 --- /dev/null +++ b/test/test_data_gen_flag.py @@ -0,0 +1,9 @@ +import os + +from pydantic import TypeAdapter + +GEN_TEST_DATA = TypeAdapter(bool).validate_python(os.getenv("DOCLING_GEN_TEST_DATA", 0)) + + +def test_gen_test_data_flag(): + assert not GEN_TEST_DATA diff --git a/test/test_hierarchical_chunker.py b/test/test_hierarchical_chunker.py index ec98dc71..a671155d 100644 --- a/test/test_hierarchical_chunker.py +++ b/test/test_hierarchical_chunker.py @@ -9,6 +9,19 @@ from docling_core.transforms.chunker.hierarchical_chunker import DocChunk from docling_core.types.doc import DoclingDocument as DLDocument +from .test_data_gen_flag import GEN_TEST_DATA + + +def _process(act_data, exp_path_str): + if GEN_TEST_DATA: + with open(exp_path_str, mode="w", encoding="utf-8") as f: + json.dump(act_data, fp=f, indent=4) + f.write("\n") + else: + with open(exp_path_str, encoding="utf-8") as f: + exp_data = json.load(fp=f) + assert exp_data == act_data + def test_chunk_merge_list_items(): with open("test/data/chunker/0_inp_dl_doc.json", encoding="utf-8") as f: @@ -21,9 +34,10 @@ def test_chunk_merge_list_items(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open("test/data/chunker/0_out_chunks.json", encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str="test/data/chunker/0_out_chunks.json", + ) def test_chunk_no_merge_list_items(): @@ -37,6 +51,7 @@ def test_chunk_no_merge_list_items(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open("test/data/chunker/1_out_chunks.json", encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str="test/data/chunker/1_out_chunks.json", + ) diff --git a/test/test_hybrid_chunker.py b/test/test_hybrid_chunker.py index 6a6e8e2e..adea090c 100644 --- a/test/test_hybrid_chunker.py +++ b/test/test_hybrid_chunker.py @@ -11,6 +11,8 @@ from docling_core.transforms.chunker.hybrid_chunker import HybridChunker from docling_core.types.doc import DoclingDocument as DLDocument +from .test_data_gen_flag import GEN_TEST_DATA + EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" MAX_TOKENS = 64 INPUT_FILE = "test/data/chunker/2_inp_dl_doc.json" @@ -18,6 +20,17 @@ TOKENIZER = AutoTokenizer.from_pretrained(EMBED_MODEL_ID) +def _process(act_data, exp_path_str): + if GEN_TEST_DATA: + with open(exp_path_str, mode="w", encoding="utf-8") as f: + json.dump(act_data, fp=f, indent=4) + f.write("\n") + else: + with open(exp_path_str, encoding="utf-8") as f: + exp_data = json.load(fp=f) + assert exp_data == act_data + + def test_chunk_merge_peers(): EXPECTED_OUT_FILE = "test/data/chunker/2a_out_chunks.json" @@ -36,9 +49,10 @@ def test_chunk_merge_peers(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) def test_chunk_no_merge_peers(): @@ -58,9 +72,10 @@ def test_chunk_no_merge_peers(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) def test_serialize(): @@ -88,9 +103,10 @@ def test_serialize(): for chunk in chunks ] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) def test_chunk_with_model_name(): @@ -111,9 +127,10 @@ def test_chunk_with_model_name(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) def test_chunk_default(): @@ -130,9 +147,10 @@ def test_chunk_default(): act_data = dict( root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) def test_serialize_altered_delim(): @@ -158,6 +176,7 @@ def test_serialize_altered_delim(): for chunk in chunks ] ) - with open(EXPECTED_OUT_FILE, encoding="utf-8") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data + _process( + act_data=act_data, + exp_path_str=EXPECTED_OUT_FILE, + ) From 66908e3ab39090c9d727609b015a6dc756eea08d Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 4 Feb 2025 10:13:05 +0100 Subject: [PATCH 03/10] Do not make test rely on order in yaml Signed-off-by: Christoph Auer --- test/test_docling_doc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py index e7bc1412..286f73b7 100644 --- a/test/test_docling_doc.py +++ b/test/test_docling_doc.py @@ -195,12 +195,14 @@ def read(name: str): f"./test/data/docling_document/unit/{name}.yaml", "r", encoding="utf-8" ) as fr: gold = fr.read() - return gold + return yaml.safe_load(gold) def verify(dc, obj): pred = serialise(obj).strip() + pred = yaml.safe_load(pred) + # print(f"\t{dc.__name__}:\n {pred}") - gold = read(dc.__name__).strip() + gold = read(dc.__name__) assert pred == gold, f"pred!=gold for {dc.__name__}" From efa33e940870980ebf6b064f88e9bb565c442a23 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 4 Feb 2025 10:15:48 +0100 Subject: [PATCH 04/10] chore: format fixes Signed-off-by: Christoph Auer --- docs/DoclingDocument.json | 46 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json index 1515b730..24defc2f 100644 --- a/docs/DoclingDocument.json +++ b/docs/DoclingDocument.json @@ -190,6 +190,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "const": "code", "default": "code", @@ -289,6 +293,15 @@ "title": "CodeLanguageLabel", "type": "string" }, + "ContentLayer": { + "description": "ContentLayer.", + "enum": [ + "body", + "furniture" + ], + "title": "ContentLayer", + "type": "string" + }, "CoordOrigin": { "description": "CoordOrigin.", "enum": [ @@ -366,6 +379,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "name": { "default": "group", "title": "Name", @@ -465,6 +482,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "const": "key_value_region", "default": "key_value_region", @@ -514,6 +535,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "const": "list_item", "default": "list_item", @@ -722,6 +747,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "const": "picture", "default": "picture", @@ -1119,6 +1148,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "const": "section_header", "default": "section_header", @@ -1297,6 +1330,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "default": "table", "enum": [ @@ -1388,6 +1425,10 @@ "title": "Children", "type": "array" }, + "content_layer": { + "$ref": "#/$defs/ContentLayer", + "default": "body" + }, "label": { "enum": [ "caption", @@ -1467,9 +1508,11 @@ "self_ref": "#/furniture", "parent": null, "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" - } + }, + "deprecated": true }, "body": { "$ref": "#/$defs/GroupItem", @@ -1477,6 +1520,7 @@ "self_ref": "#/body", "parent": null, "children": [], + "content_layer": "body", "name": "_root_", "label": "unspecified" } From 4c19ae7f7fb128a12173c50a9ab376114f70a689 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 4 Feb 2025 10:29:30 +0100 Subject: [PATCH 05/10] fix: legacy_to_docling_doc must use content_layer Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 2 +- docling_core/utils/legacy.py | 6 +- docs/DoclingDocument.json | 2 +- .../data/doc/constructed_doc.embedded.json.gt | 2 +- .../data/doc/constructed_doc.embedded.yaml.gt | 2 +- .../doc/constructed_doc.referenced.json.gt | 2 +- .../doc/constructed_doc.referenced.yaml.gt | 2 +- .../legacy_doc/doc-export.docling.yaml.gt | 138 ++++++++++++++++++ 8 files changed, 147 insertions(+), 9 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 165ac707..60ab3f36 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -1430,7 +1430,7 @@ class DoclingDocument(BaseModel): ) furniture: Annotated[GroupItem, Field(deprecated=True)] = GroupItem( - name="_root_", self_ref="#/furniture" + name="_root_", self_ref="#/furniture", content_layer=ContentLayer.FURNITURE ) # List[RefItem] = [] body: GroupItem = GroupItem(name="_root_", self_ref="#/body") # List[RefItem] = [] diff --git a/docling_core/utils/legacy.py b/docling_core/utils/legacy.py index 7d08a876..74f91867 100644 --- a/docling_core/utils/legacy.py +++ b/docling_core/utils/legacy.py @@ -25,7 +25,7 @@ TableItem, TextItem, ) -from docling_core.types.doc.document import GroupItem, ListItem, TableData +from docling_core.types.doc.document import ContentLayer, GroupItem, ListItem, TableData from docling_core.types.doc.labels import GroupLabel from docling_core.types.legacy_doc.base import ( BaseCell, @@ -400,7 +400,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]: doc.add_text( label=DocItemLabel.PAGE_HEADER, text=text_item.text, - parent=doc.furniture, + content_layer=ContentLayer.FURNITURE, ) # page footers @@ -412,7 +412,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]: doc.add_text( label=DocItemLabel.PAGE_FOOTER, text=text_item.text, - parent=doc.furniture, + content_layer=ContentLayer.FURNITURE, ) # footnotes diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json index 24defc2f..50046ee8 100644 --- a/docs/DoclingDocument.json +++ b/docs/DoclingDocument.json @@ -1508,7 +1508,7 @@ "self_ref": "#/furniture", "parent": null, "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "_root_", "label": "unspecified" }, diff --git a/test/data/doc/constructed_doc.embedded.json.gt b/test/data/doc/constructed_doc.embedded.json.gt index b6602d32..17e84c90 100644 --- a/test/data/doc/constructed_doc.embedded.json.gt +++ b/test/data/doc/constructed_doc.embedded.json.gt @@ -5,7 +5,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "_root_", "label": "unspecified" }, diff --git a/test/data/doc/constructed_doc.embedded.yaml.gt b/test/data/doc/constructed_doc.embedded.yaml.gt index 009768fb..2ad6fdd5 100644 --- a/test/data/doc/constructed_doc.embedded.yaml.gt +++ b/test/data/doc/constructed_doc.embedded.yaml.gt @@ -14,7 +14,7 @@ body: self_ref: '#/body' furniture: children: [] - content_layer: body + content_layer: furniture label: unspecified name: _root_ self_ref: '#/furniture' diff --git a/test/data/doc/constructed_doc.referenced.json.gt b/test/data/doc/constructed_doc.referenced.json.gt index 77c1fb0a..ec98a42e 100644 --- a/test/data/doc/constructed_doc.referenced.json.gt +++ b/test/data/doc/constructed_doc.referenced.json.gt @@ -5,7 +5,7 @@ "furniture": { "self_ref": "#/furniture", "children": [], - "content_layer": "body", + "content_layer": "furniture", "name": "_root_", "label": "unspecified" }, diff --git a/test/data/doc/constructed_doc.referenced.yaml.gt b/test/data/doc/constructed_doc.referenced.yaml.gt index 55cf4c2d..2afbbbb6 100644 --- a/test/data/doc/constructed_doc.referenced.yaml.gt +++ b/test/data/doc/constructed_doc.referenced.yaml.gt @@ -14,7 +14,7 @@ body: self_ref: '#/body' furniture: children: [] - content_layer: body + content_layer: furniture label: unspecified name: _root_ self_ref: '#/furniture' diff --git a/test/data/legacy_doc/doc-export.docling.yaml.gt b/test/data/legacy_doc/doc-export.docling.yaml.gt index 26a5c04a..6dbf5c63 100644 --- a/test/data/legacy_doc/doc-export.docling.yaml.gt +++ b/test/data/legacy_doc/doc-export.docling.yaml.gt @@ -124,11 +124,13 @@ body: - $ref: '#/texts/121' - $ref: '#/texts/122' - $ref: '#/texts/123' + content_layer: body label: unspecified name: _root_ self_ref: '#/body' furniture: children: [] + content_layer: furniture label: unspecified name: _root_ self_ref: '#/furniture' @@ -136,6 +138,7 @@ groups: - children: - $ref: '#/texts/46' - $ref: '#/texts/47' + content_layer: body label: list name: list parent: @@ -143,6 +146,7 @@ groups: self_ref: '#/groups/0' - children: - $ref: '#/texts/58' + content_layer: body label: list name: list parent: @@ -150,6 +154,7 @@ groups: self_ref: '#/groups/1' - children: - $ref: '#/texts/98' + content_layer: body label: list name: list parent: @@ -157,6 +162,7 @@ groups: self_ref: '#/groups/2' - children: - $ref: '#/texts/120' + content_layer: body label: list name: list parent: @@ -243,6 +249,7 @@ pictures: - annotations: [] captions: [] children: [] + content_layer: body footnotes: [] label: picture parent: @@ -265,6 +272,7 @@ pictures: - $ref: '#/texts/30' children: - $ref: '#/texts/30' + content_layer: body footnotes: [] label: picture parent: @@ -287,6 +295,7 @@ pictures: - $ref: '#/texts/51' children: - $ref: '#/texts/51' + content_layer: body footnotes: [] label: picture parent: @@ -309,6 +318,7 @@ pictures: - $ref: '#/texts/69' children: - $ref: '#/texts/69' + content_layer: body footnotes: [] label: picture parent: @@ -331,6 +341,7 @@ pictures: - $ref: '#/texts/85' children: - $ref: '#/texts/85' + content_layer: body footnotes: [] label: picture parent: @@ -353,6 +364,7 @@ pictures: - $ref: '#/texts/90' children: - $ref: '#/texts/90' + content_layer: body footnotes: [] label: picture parent: @@ -376,6 +388,7 @@ tables: - $ref: '#/texts/76' children: - $ref: '#/texts/76' + content_layer: body data: grid: - - bbox: @@ -1882,6 +1895,7 @@ tables: - $ref: '#/texts/82' children: - $ref: '#/texts/82' + content_layer: body data: grid: - - bbox: @@ -3582,6 +3596,7 @@ tables: self_ref: '#/tables/1' texts: - children: [] + content_layer: body label: page_header orig: arXiv:2305.03393v1 [cs.CV] 5 May 2023 parent: @@ -3600,6 +3615,7 @@ texts: self_ref: '#/texts/0' text: arXiv:2305.03393v1 [cs.CV] 5 May 2023 - children: [] + content_layer: body label: section_header level: 1 orig: Optimized Table Tokenization for Table Structure Recognition @@ -3619,6 +3635,7 @@ texts: self_ref: '#/texts/1' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: text orig: Maksym Lysak [0000-0002-3723-$^{6960]}$, Ahmed Nassar[0000-0002-9468-$^{0822]}$, Nikolaos Livathinos [0000-0001-8513-$^{3491]}$, Christoph Auer[0000-0001-5761-$^{0422]}$, @@ -3641,6 +3658,7 @@ texts: Nikolaos Livathinos [0000-0001-8513-$^{3491]}$, Christoph Auer[0000-0001-5761-$^{0422]}$, and Peter Staar [0000-0002-8088-0823] - children: [] + content_layer: body label: text orig: IBM Research {mly,ahn,nli,cau,taa}@zurich.ibm.com parent: @@ -3659,6 +3677,7 @@ texts: self_ref: '#/texts/3' text: IBM Research {mly,ahn,nli,cau,taa}@zurich.ibm.com - children: [] + content_layer: body label: text orig: Abstract. Extracting tables from documents is a crucial task in any document conversion pipeline. Recently, transformer-based models have demonstrated that @@ -3705,6 +3724,7 @@ texts: in turn eliminates most post-processing needs. Popular table structure data-sets will be published in OTSL format to the community. - children: [] + content_layer: body label: text orig: "Keywords: Table Structure Recognition \xB7 Data Representation \xB7 Transformers\ \ \xB7 Optimization." @@ -3725,6 +3745,7 @@ texts: text: "Keywords: Table Structure Recognition \xB7 Data Representation \xB7 Transformers\ \ \xB7 Optimization." - children: [] + content_layer: body label: section_header level: 1 orig: 1 Introduction @@ -3744,6 +3765,7 @@ texts: self_ref: '#/texts/6' text: 1 Introduction - children: [] + content_layer: body label: text orig: Tables are ubiquitous in documents such as scientific papers, patents, reports, manuals, specification sheets or marketing material. They often encode highly @@ -3772,6 +3794,7 @@ texts: difficult to recover their correct structure with simple analytical methods. Therefore, accurate table extraction is achieved these days with machine-learning based methods. - children: [] + content_layer: body label: text orig: In modern document understanding systems [1,15], table extraction is typically a two-step process. Firstly, every table on a page is located with a bounding @@ -3794,6 +3817,7 @@ texts: a two-step process. Firstly, every table on a page is located with a bounding box, and secondly, their logical row and column structure is recognized. As of - children: [] + content_layer: body label: page_header orig: '2' parent: @@ -3812,6 +3836,7 @@ texts: self_ref: '#/texts/9' text: '2' - children: [] + content_layer: body label: page_header orig: M. Lysak, et al. parent: @@ -3830,6 +3855,7 @@ texts: self_ref: '#/texts/10' text: M. Lysak, et al. - children: [] + content_layer: body label: text orig: 'Fig. 1. Comparison between HTML and OTSL table structure representation: (A) table-example with complex row and column headers, including a 2D empty span, @@ -3862,6 +3888,7 @@ texts: structure (variable token sequence length per row in HTML versus a fixed length of rows in OTSL).' - children: [] + content_layer: body label: text orig: today, table detection in documents is a well understood problem, and the latest state-of-the-art (SOTA) object detection methods provide an accuracy comparable @@ -3888,6 +3915,7 @@ texts: recognition (TSR) is a lot more challenging and remains a very active area of research, in which many novel machine learning algorithms are being explored [3,4,5,9,11,12,13,14,17,18,21,22]. - children: [] + content_layer: body label: text orig: Recently emerging SOTA methods for table structure recognition employ transformer-based models, in which an image of the table is provided to the network in order to @@ -3928,6 +3956,7 @@ texts: Central), popularized primarily the use of HTML as ground-truth representation format for TSR. - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -3946,6 +3975,7 @@ texts: self_ref: '#/texts/14' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '3' parent: @@ -3964,6 +3994,7 @@ texts: self_ref: '#/texts/15' text: '3' - children: [] + content_layer: body label: text orig: While the majority of research in TSR is currently focused on the development and application of novel neural model architectures, the table structure representation @@ -3996,6 +4027,7 @@ texts: state-of-the-art Im2Seq model is TableFormer [9], we select this model to perform our experiments. - children: [] + content_layer: body label: text orig: The main contribution of this paper is the introduction of a new optimised table structure language (OTSL), specifically designed to describe table-structure @@ -4030,6 +4062,7 @@ texts: syntax validation during inference and ensures a syntactically correct table-structure. These OTSL features are illustrated in Figure 1, in comparison to HTML. - children: [] + content_layer: body label: text orig: The paper is structured as follows. In section 2, we give an overview of the latest developments in table-structure reconstruction. In section 3 we review @@ -4062,6 +4095,7 @@ texts: on HTML and ultimately demonstrate the advantages of using OTSL. Finally, in section 6 we conclude our work and outline next potential steps. - children: [] + content_layer: body label: section_header level: 1 orig: 2 Related Work @@ -4081,6 +4115,7 @@ texts: self_ref: '#/texts/19' text: 2 Related Work - children: [] + content_layer: body label: text orig: 'Approaches to formalize the logical structure and layout of tables in electronic documents date back more than two decades [16]. In the recent past, a wide variety @@ -4127,6 +4162,7 @@ texts: the relationship between the nodes, e.g. if they belong to the same column, row, or table cell.' - children: [] + content_layer: body label: page_header orig: 4 M. Lysak, et al. parent: @@ -4145,6 +4181,7 @@ texts: self_ref: '#/texts/21' text: 4 M. Lysak, et al. - children: [] + content_layer: body label: text orig: Other work [20] aims at predicting a grid for each table and deciding which cells must be merged using an attention network. Im2Seq methods cast the problem @@ -4185,6 +4222,7 @@ texts: sequence, some post-processing needs to be performed to ensure a syntactically valid (let alone correct) sequence. - children: [] + content_layer: body label: text orig: Within the Im2Seq method, we find several popular models, namely the encoder-dual-decoder model (EDD) [22], TableFormer [9], Tabsplitter[2] and Ye et. al. [19]. EDD uses @@ -4235,6 +4273,7 @@ texts: using bidirectional LSTMs to predict LaTeX code. Kayal [5] introduces Gated ResNet transformers to predict LaTeX code, and a separate OCR module to extract content. - children: [] + content_layer: body label: text orig: Im2Seq approaches have shown to be well-suited for the TSR task and allow a full end-to-end network design that can output the final table structure without @@ -4267,6 +4306,7 @@ texts: this is a necessary step before further improving neural network architectures for this task. - children: [] + content_layer: body label: section_header level: 1 orig: 3 Problem Statement @@ -4286,6 +4326,7 @@ texts: self_ref: '#/texts/25' text: 3 Problem Statement - children: [] + content_layer: body label: text orig: All known Im2Seq based models for TSR fundamentally work in similar ways. Given an image of a table, the Im2Seq model predicts the structure of the table @@ -4308,6 +4349,7 @@ texts: Given an image of a table, the Im2Seq model predicts the structure of the table by generating a sequence of tokens. These tokens originate from a finite vocab- - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -4326,6 +4368,7 @@ texts: self_ref: '#/texts/27' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '5' parent: @@ -4344,6 +4387,7 @@ texts: self_ref: '#/texts/28' text: '5' - children: [] + content_layer: body label: text orig: ulary and can be interpreted as a table structure. For example, with the HTML tokens ,
, , , and , one can construct simple @@ -4372,6 +4416,7 @@ texts: documents [21,22], due to a variety of spanning cells definitions in the HTML token vocabulary. - children: [] + content_layer: body label: caption orig: Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet. parent: @@ -4390,6 +4435,7 @@ texts: self_ref: '#/texts/30' text: Fig. 2. Frequency of tokens in HTML and OTSL as they appear in PubTabNet. - children: [] + content_layer: body label: text orig: Obviously, HTML and other general-purpose markup languages were not designed for Im2Seq models. As such, they have some serious drawbacks. First, the token @@ -4432,6 +4478,7 @@ texts: of tokens in combination with variable token row-length makes it challenging for models to learn the HTML structure. - children: [] + content_layer: body label: text orig: Additionally, it would be desirable if the representation would easily allow an early detection of invalid sequences on-the-go, before the prediction of the @@ -4456,6 +4503,7 @@ texts: entire table structure is completed. HTML is not well-suited for this purpose as the verification of incomplete sequences is non-trivial or even impossible. - children: [] + content_layer: body label: text orig: In a valid HTML table, the token sequence must describe a 2D grid of table cells, serialised in row-major ordering, where each row and each column have the @@ -4486,6 +4534,7 @@ texts: especially for large tables with many row-and column-spans, it is complex to verify the consistency of predicted structures during sequence - children: [] + content_layer: body label: page_header orig: '6' parent: @@ -4504,6 +4553,7 @@ texts: self_ref: '#/texts/34' text: '6' - children: [] + content_layer: body label: page_header orig: M. Lysak, et al. parent: @@ -4522,6 +4572,7 @@ texts: self_ref: '#/texts/35' text: M. Lysak, et al. - children: [] + content_layer: body label: text orig: generation. Implicitly, this also means that Im2Seq models need to learn these complex syntax rules, simply to deliver valid output. @@ -4542,6 +4593,7 @@ texts: text: generation. Implicitly, this also means that Im2Seq models need to learn these complex syntax rules, simply to deliver valid output. - children: [] + content_layer: body label: text orig: In practice, we observe two major issues with prediction quality when training Im2Seq models on HTML table structure generation from images. On the one hand, @@ -4582,6 +4634,7 @@ texts: model performance, since they reflect not only in the task of pure structure recognition but also in the equally crucial recognition or matching of table cell content. - children: [] + content_layer: body label: section_header level: 1 orig: 4 Optimised Table Structure Language @@ -4601,6 +4654,7 @@ texts: self_ref: '#/texts/38' text: 4 Optimised Table Structure Language - children: [] + content_layer: body label: text orig: To mitigate the issues with HTML in Im2Seq-based TSR models laid out before, we propose here our Optimised Table Structure Language (OTSL). OTSL is designed @@ -4631,6 +4685,7 @@ texts: demonstrate how the compact structure representation and minimized sequence length improves prediction accuracy and inference time in the TableFormer architecture. - children: [] + content_layer: body label: section_header level: 1 orig: 4.1 Language Definition @@ -4650,6 +4705,7 @@ texts: self_ref: '#/texts/40' text: 4.1 Language Definition - children: [] + content_layer: body label: text orig: In Figure 3, we illustrate how the OTSL is defined. In essence, the OTSL defines only 5 tokens that directly describe a tabular structure based on an atomic 2D @@ -4672,6 +4728,7 @@ texts: only 5 tokens that directly describe a tabular structure based on an atomic 2D grid. - children: [] + content_layer: body label: text orig: 'The OTSL vocabulary is comprised of the following tokens:' parent: @@ -4690,6 +4747,7 @@ texts: self_ref: '#/texts/42' text: 'The OTSL vocabulary is comprised of the following tokens:' - children: [] + content_layer: body label: text orig: -'C ' cell a new table cell that either has or does not have cell content parent: @@ -4708,6 +4766,7 @@ texts: self_ref: '#/texts/43' text: -'C ' cell a new table cell that either has or does not have cell content - children: [] + content_layer: body label: text orig: -'L ' cell left-looking cell, merging with the left neighbor cell to create a span @@ -4728,6 +4787,7 @@ texts: text: -'L ' cell left-looking cell, merging with the left neighbor cell to create a span - children: [] + content_layer: body label: text orig: -'U ' cell up-looking cell, merging with the upper neighbor cell to create a span @@ -4748,6 +4808,7 @@ texts: text: -'U ' cell up-looking cell, merging with the upper neighbor cell to create a span - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -4768,6 +4829,7 @@ texts: self_ref: '#/texts/46' text: -'X ' cell cross cell, to merge with both left and upper neighbor cells - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -4788,6 +4850,7 @@ texts: self_ref: '#/texts/47' text: -'NL ' new-line, switch to the next row. - children: [] + content_layer: body label: text orig: A notable attribute of OTSL is that it has the capability of achieving lossless conversion to HTML. @@ -4808,6 +4871,7 @@ texts: text: A notable attribute of OTSL is that it has the capability of achieving lossless conversion to HTML. - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -4826,6 +4890,7 @@ texts: self_ref: '#/texts/49' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '7' parent: @@ -4844,6 +4909,7 @@ texts: self_ref: '#/texts/50' text: '7' - children: [] + content_layer: body label: caption orig: 'Fig. 3. OTSL description of table structure: A-table example; B-graphical representation of table structure; C-mapping structure on a grid; D-OTSL structure @@ -4866,6 +4932,7 @@ texts: representation of table structure; C-mapping structure on a grid; D-OTSL structure encoding; E-explanation on cell encoding' - children: [] + content_layer: body label: section_header level: 1 orig: 4.2 Language Syntax @@ -4885,6 +4952,7 @@ texts: self_ref: '#/texts/52' text: 4.2 Language Syntax - children: [] + content_layer: body label: text orig: 'The OTSL representation follows these syntax rules:' parent: @@ -4903,6 +4971,7 @@ texts: self_ref: '#/texts/53' text: 'The OTSL representation follows these syntax rules:' - children: [] + content_layer: body label: text orig: '1. Left-looking cell rule : The left neighbour of an ''L '' cell must be either another ''L '' cell or a ''C '' cell.' @@ -4923,6 +4992,7 @@ texts: text: '1. Left-looking cell rule : The left neighbour of an ''L '' cell must be either another ''L '' cell or a ''C '' cell.' - children: [] + content_layer: body label: text orig: '2. Up-looking cell rule : The upper neighbour of a ''U '' cell must be either another ''U '' cell or a ''C '' cell.' @@ -4943,6 +5013,7 @@ texts: text: '2. Up-looking cell rule : The upper neighbour of a ''U '' cell must be either another ''U '' cell or a ''C '' cell.' - children: [] + content_layer: body label: text orig: '3. Cross cell rule : The left neighbour of an ''X '' cell must be either another ''X '' cell or a ''U '' cell, and the upper neighbour of an ''X '' cell @@ -4965,6 +5036,7 @@ texts: another ''X '' cell or a ''U '' cell, and the upper neighbour of an ''X '' cell must be either another ''X '' cell or an ''L '' cell.' - children: [] + content_layer: body label: text orig: '4. First row rule : Only ''L '' cells and ''C '' cells are allowed in the first row.' @@ -4985,6 +5057,7 @@ texts: text: '4. First row rule : Only ''L '' cells and ''C '' cells are allowed in the first row.' - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -5007,6 +5080,7 @@ texts: text: '5. First column rule : Only ''U '' cells and ''C '' cells are allowed in the first column.' - children: [] + content_layer: body label: text orig: '6. Rectangular rule : The table representation is always rectangular-all rows must have an equal number of tokens, terminated with ''NL '' token.' @@ -5027,6 +5101,7 @@ texts: text: '6. Rectangular rule : The table representation is always rectangular-all rows must have an equal number of tokens, terminated with ''NL '' token.' - children: [] + content_layer: body label: text orig: 'The application of these rules gives OTSL a set of unique properties. First of all, the OTSL enforces a strictly rectangular structure representation, where @@ -5065,6 +5140,7 @@ texts: generation by looking at the previously predicted sequence. As such, OTSL can guarantee that every predicted sequence is syntactically valid.' - children: [] + content_layer: body label: text orig: These characteristics can be easily learned by sequence generator networks, as we demonstrate further below. We find strong indications that this pattern @@ -5085,6 +5161,7 @@ texts: text: These characteristics can be easily learned by sequence generator networks, as we demonstrate further below. We find strong indications that this pattern - children: [] + content_layer: body label: page_header orig: '8' parent: @@ -5103,6 +5180,7 @@ texts: self_ref: '#/texts/62' text: '8' - children: [] + content_layer: body label: page_header orig: M. Lysak, et al. parent: @@ -5121,6 +5199,7 @@ texts: self_ref: '#/texts/63' text: M. Lysak, et al. - children: [] + content_layer: body label: text orig: reduces significantly the column drift seen in the HTML based models (see Figure 5). @@ -5141,6 +5220,7 @@ texts: text: reduces significantly the column drift seen in the HTML based models (see Figure 5). - children: [] + content_layer: body label: section_header level: 1 orig: 4.3 Error-detection and-mitigation @@ -5160,6 +5240,7 @@ texts: self_ref: '#/texts/65' text: 4.3 Error-detection and-mitigation - children: [] + content_layer: body label: text orig: The design of OTSL allows to validate a table structure easily on an unfinished sequence. The detection of an invalid sequence token is a clear indication of @@ -5196,6 +5277,7 @@ texts: prediction confidence invalidates the predicted sequence, and replace it by the token with the next highest confidence until OTSL rules are satisfied. - children: [] + content_layer: body label: section_header level: 1 orig: 5 Experiments @@ -5215,6 +5297,7 @@ texts: self_ref: '#/texts/67' text: 5 Experiments - children: [] + content_layer: body label: text orig: 'To evaluate the impact of OTSL on prediction accuracy and inference times, we conducted a series of experiments based on the TableFormer model (Figure 4) @@ -5249,6 +5332,7 @@ texts: truth (GT) from all data sets has been converted into OTSL format for this purpose, and will be made publicly available.' - children: [] + content_layer: body label: caption orig: Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach. @@ -5269,6 +5353,7 @@ texts: text: Fig. 4. Architecture sketch of the TableFormer model, which is a representative for the Im2Seq approach. - children: [] + content_layer: body label: text orig: We rely on standard metrics such as Tree Edit Distance score (TEDs) for table structure prediction, and Mean Average Precision (mAP) with 0.75 Intersection @@ -5293,6 +5378,7 @@ texts: Over Union (IOU) threshold for the bounding-box predictions of table cells. The predicted OTSL structures were converted back to HTML format in - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -5311,6 +5397,7 @@ texts: self_ref: '#/texts/71' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '9' parent: @@ -5329,6 +5416,7 @@ texts: self_ref: '#/texts/72' text: '9' - children: [] + content_layer: body label: text orig: order to compute the TED score. Inference timing results for all experiments were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 @@ -5351,6 +5439,7 @@ texts: were obtained from the same machine on a single core with AMD EPYC 7763 CPU @2.45 GHz. - children: [] + content_layer: body label: section_header level: 1 orig: 5.1 Hyper Parameter Optimization @@ -5370,6 +5459,7 @@ texts: self_ref: '#/texts/74' text: 5.1 Hyper Parameter Optimization - children: [] + content_layer: body label: text orig: We have chosen the PubTabNet data set to perform HPO, since it includes a highly diverse set of tables. Also we report TED scores separately for simple @@ -5398,6 +5488,7 @@ texts: better mAP scores in comparison to HTML. However OTSL yields a 2x speed up in the inference runtime over HTML. - children: [] + content_layer: body label: caption orig: 'Table 1. HPO performed in OTSL and HTML representation on the same transformer-based TableFormer [9] architecture, trained only on PubTabNet [22]. Effects of reducing @@ -5424,6 +5515,7 @@ texts: trained on OTSL perform better, especially in recognizing complex table structures, and maintain a much higher mAP score than the HTML counterpart.' - children: [] + content_layer: body label: section_header level: 1 orig: 5.2 Quantitative Results @@ -5443,6 +5535,7 @@ texts: self_ref: '#/texts/77' text: 5.2 Quantitative Results - children: [] + content_layer: body label: text orig: 'We picked the model parameter configuration that produced the best prediction quality (enc=6, dec=6, heads=8) with PubTabNet alone, then independently trained @@ -5473,6 +5566,7 @@ texts: outperforms HTML across the board, keeping high TEDs and mAP scores even on difficult financial tables (FinTabNet) that contain sparse and large tables.' - children: [] + content_layer: body label: text orig: Additionally, the results show that OTSL has an advantage over HTML when applied on a bigger data set like PubTables-1M and achieves significantly improved scores. @@ -5497,6 +5591,7 @@ texts: Finally, OTSL achieves faster inference due to fewer decoding steps which is a result of the reduced sequence representation. - children: [] + content_layer: body label: page_header orig: '10' parent: @@ -5515,6 +5610,7 @@ texts: self_ref: '#/texts/80' text: '10' - children: [] + content_layer: body label: page_header orig: M. Lysak, et al. parent: @@ -5533,6 +5629,7 @@ texts: self_ref: '#/texts/81' text: M. Lysak, et al. - children: [] + content_layer: body label: caption orig: Table 2. TSR and cell detection results compared between OTSL and HTML on the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using Table-Former @@ -5555,6 +5652,7 @@ texts: the PubTabNet [22], FinTabNet [21] and PubTables-1M [14] data sets using Table-Former [9] (with enc=6, dec=6, heads=8). - children: [] + content_layer: body label: section_header level: 1 orig: 5.3 Qualitative Results @@ -5574,6 +5672,7 @@ texts: self_ref: '#/texts/83' text: 5.3 Qualitative Results - children: [] + content_layer: body label: text orig: To illustrate the qualitative differences between OTSL and HTML, Figure 5 demonstrates less overlap and more accurate bounding boxes with OTSL. In Figure @@ -5598,6 +5697,7 @@ texts: 6, OTSL proves to be more effective in handling tables with longer token sequences, resulting in even more precise structure prediction and bounding boxes. - children: [] + content_layer: body label: caption orig: "Fig. 5. The OTSL model produces more accurate bounding boxes with less overlap\ \ (E) than the HTML model (D), when predicting the structure of a sparse table\ @@ -5622,6 +5722,7 @@ texts: \ (A), at twice the inference speed because of shorter sequence length (B),(C).\ \ 'PMC2807444_006_00.png ' PubTabNet. \u03BC" - children: [] + content_layer: body label: text orig: "\u03BC" parent: @@ -5640,6 +5741,7 @@ texts: self_ref: '#/texts/86' text: "\u03BC" - children: [] + content_layer: body label: text orig: "\u2265" parent: @@ -5658,6 +5760,7 @@ texts: self_ref: '#/texts/87' text: "\u2265" - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -5676,6 +5779,7 @@ texts: self_ref: '#/texts/88' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '11' parent: @@ -5694,6 +5798,7 @@ texts: self_ref: '#/texts/89' text: '11' - children: [] + content_layer: body label: caption orig: Fig. 6. Visualization of predicted structure and detected bounding boxes on a complex table with many rows. The OTSL model (B) captured repeating pattern @@ -5720,6 +5825,7 @@ texts: model also didn't complete the HTML sequence correctly and displayed a lot more of drift and overlap of bounding boxes. 'PMC5406406_003_01.png ' PubTabNet. - children: [] + content_layer: body label: page_header orig: 12 M. Lysak, et al. parent: @@ -5738,6 +5844,7 @@ texts: self_ref: '#/texts/91' text: 12 M. Lysak, et al. - children: [] + content_layer: body label: section_header level: 1 orig: 6 Conclusion @@ -5757,6 +5864,7 @@ texts: self_ref: '#/texts/92' text: 6 Conclusion - children: [] + content_layer: body label: text orig: We demonstrated that representing tables in HTML for the task of table structure recognition with Im2Seq models is ill-suited and has serious limitations. Furthermore, @@ -5781,6 +5889,7 @@ texts: we presented in this paper an Optimized Table Structure Language (OTSL) which, when compared to commonly used general purpose languages, has several key benefits. - children: [] + content_layer: body label: text orig: First and foremost, given the same network configuration, inference time for a table-structure prediction is about 2 times faster compared to the conventional @@ -5817,6 +5926,7 @@ texts: in OTSL with prediction quality comparable to models trained on HTML (see Table 1). - children: [] + content_layer: body label: text orig: Secondly, OTSL has more inherent structure and a significantly restricted vocabulary size. This allows autoregressive models to perform better in the TED @@ -5857,6 +5967,7 @@ texts: mistakes. This in return allows to perform structural error detection and correction on-the-fly during sequence generation. - children: [] + content_layer: body label: section_header level: 1 orig: References @@ -5876,6 +5987,7 @@ texts: self_ref: '#/texts/96' text: References - children: [] + content_layer: body label: reference orig: '1. Auer, C., Dolfi, M., Carvalho, A., Ramis, C.B., Staar, P.W.J.: Delivering document conversion as a cloud service with high throughput and responsiveness. @@ -5898,6 +6010,7 @@ texts: document conversion as a cloud service with high throughput and responsiveness. CoRR abs/2206.00785 (2022). https://doi.org/10.48550/arXiv.2206.00785, https://doi.org/10.48550/arXiv.2206.00785' - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -5924,6 +6037,7 @@ texts: \ In: Porwal, U., Forn\xE9s, A., Shafait, F. (eds.) Frontiers in Handwriting Recognition.\ \ pp. 545-561. Springer International Publishing, Cham (2022)" - children: [] + content_layer: body label: reference orig: '3. Chi, Z., Huang, H., Xu, H.D., Yu, H., Yin, W., Mao, X.L.: Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019)' @@ -5944,6 +6058,7 @@ texts: text: '3. Chi, Z., Huang, H., Xu, H.D., Yu, H., Yin, W., Mao, X.L.: Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019)' - children: [] + content_layer: body label: reference orig: '4. Deng, Y., Rosenberg, D., Mann, G.: Challenges in end-to-end neural scientific table recognition. In: 2019 International Conference on Document Analysis and @@ -5966,6 +6081,7 @@ texts: table recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 894-901. IEEE (2019)' - children: [] + content_layer: body label: page_header orig: Optimized Table Tokenization for Table Structure Recognition parent: @@ -5984,6 +6100,7 @@ texts: self_ref: '#/texts/101' text: Optimized Table Tokenization for Table Structure Recognition - children: [] + content_layer: body label: page_header orig: '13' parent: @@ -6002,6 +6119,7 @@ texts: self_ref: '#/texts/102' text: '13' - children: [] + content_layer: body label: reference orig: '5. Kayal, P., Anand, M., Desai, H., Singh, M.: Tables to latex: structure and content extraction from scientific tables. International Journal on Document @@ -6024,6 +6142,7 @@ texts: and content extraction from scientific tables. International Journal on Document Analysis and Recognition (IJDAR) pp. 1-10 (2022)' - children: [] + content_layer: body label: reference orig: '6. Lee, E., Kwon, J., Yang, H., Park, J., Lee, S., Koo, H.I., Cho, N.I.: Table structure recognition based on grid shape graph. In: 2022 Asia-Pacific Signal @@ -6048,6 +6167,7 @@ texts: and Information Processing Association Annual Summit and Conference (APSIPA ASC). pp. 1868-1873. IEEE (2022)' - children: [] + content_layer: body label: reference orig: '7. Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: A benchmark dataset for table detection and recognition (2019)' @@ -6068,6 +6188,7 @@ texts: text: '7. Li, M., Cui, L., Huang, S., Wei, F., Zhou, M., Li, Z.: Tablebank: A benchmark dataset for table detection and recognition (2019)' - children: [] + content_layer: body label: reference orig: '8. Livathinos, N., Berrospi, C., Lysak, M., Kuropiatnyk, V., Nassar, A., Carvalho, A., Dolfi, M., Auer, C., Dinkla, K., Staar, P.: Robust pdf document @@ -6094,6 +6215,7 @@ texts: on Artificial Intelligence 35 (17), 15137-15145 (May 2021), https://ojs.aaai.org/index.php/ AAAI/article/view/17777' - children: [] + content_layer: body label: reference orig: '9. Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: Table structure understanding with transformers. In: Proceedings of the IEEE/CVF Conference on @@ -6116,6 +6238,7 @@ texts: understanding with transformers. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4614-4623 (June 2022)' - children: [] + content_layer: body label: reference orig: '10. Pfitzmann, B., Auer, C., Dolfi, M., Nassar, A.S., Staar, P.W.J.: Doclaynet: A large human-annotated dataset for document-layout segmentation. In: Zhang, A., @@ -6142,6 +6265,7 @@ texts: and Data Mining, Washington, DC, USA, August 14-18, 2022. pp. 3743-3751. ACM (2022). https://doi.org/10.1145/3534678.3539043, https:// doi.org/10.1145/3534678.3539043' - children: [] + content_layer: body label: reference orig: '11. Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: Cascadetabnet: An approach for end to end table detection and structure recognition from imagebased @@ -6166,6 +6290,7 @@ texts: documents. In: Proceedings of the IEEE/CVF conference on computer vision and pattern recognition workshops. pp. 572-573 (2020)' - children: [] + content_layer: body label: reference orig: '12. Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: Deepdesrt: Deep learning for detection and structure recognition of tables in document images. @@ -6190,6 +6315,7 @@ texts: In: 2017 14th IAPR international conference on document analysis and recognition (ICDAR). vol. 1, pp. 1162-1167. IEEE (2017)' - children: [] + content_layer: body label: reference orig: '13. Siddiqui, S.A., Fateh, I.A., Rizvi, S.T.R., Dengel, A., Ahmed, S.: Deeptabstr: Deep learning based table structure recognition. In: 2019 International Conference @@ -6212,6 +6338,7 @@ texts: Deep learning based table structure recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1403-1409 (2019). https:// doi.org/10.1109/ICDAR.2019.00226' - children: [] + content_layer: body label: reference orig: '14. Smock, B., Pesala, R., Abraham, R.: PubTables-1M: Towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE/CVF @@ -6236,6 +6363,7 @@ texts: Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4634-4642 (June 2022)' - children: [] + content_layer: body label: reference orig: '15. Staar, P.W.J., Dolfi, M., Auer, C., Bekas, C.: Corpus conversion service: A machine learning platform to ingest documents at scale. In: Proceedings of the @@ -6262,6 +6390,7 @@ texts: pp. 774-782. KDD ''18, Association for Computing Machinery, New York, NY, USA (2018). https://doi.org/10.1145/3219819.3219834, https://doi.org/10. 1145/3219819.3219834' - children: [] + content_layer: body label: reference orig: '16. Wang, X.: Tabular Abstraction, Editing, and Formatting. Ph.D. thesis, CAN (1996), aAINN09397' @@ -6282,6 +6411,7 @@ texts: text: '16. Wang, X.: Tabular Abstraction, Editing, and Formatting. Ph.D. thesis, CAN (1996), aAINN09397' - children: [] + content_layer: body label: reference orig: '17. Xue, W., Li, Q., Tao, D.: Res2tim: Reconstruct syntactic structures from table images. In: 2019 International Conference on Document Analysis and Recognition @@ -6304,6 +6434,7 @@ texts: table images. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 749-755. IEEE (2019)' - children: [] + content_layer: body label: page_header orig: '14' parent: @@ -6322,6 +6453,7 @@ texts: self_ref: '#/texts/116' text: '14' - children: [] + content_layer: body label: page_header orig: M. Lysak, et al. parent: @@ -6340,6 +6472,7 @@ texts: self_ref: '#/texts/117' text: M. Lysak, et al. - children: [] + content_layer: body label: reference orig: '18. Xue, W., Yu, B., Wang, W., Tao, D., Li, Q.: Tgrnet: A table graph reconstruction network for table structure recognition. In: Proceedings of the IEEE/CVF International @@ -6362,6 +6495,7 @@ texts: network for table structure recognition. In: Proceedings of the IEEE/CVF International Conference on Computer Vision. pp. 1295-1304 (2021)' - children: [] + content_layer: body label: reference orig: '19. Ye, J., Qi, X., He, Y., Chen, Y., Gu, D., Gao, P., Xiao, R.: Pingan-vcgroup''s solution for icdar 2021 competition on scientific literature parsing task b: Table @@ -6384,6 +6518,7 @@ texts: solution for icdar 2021 competition on scientific literature parsing task b: Table recognition to html (2021). https://doi.org/10.48550/ARXIV.2105.01848, https://arxiv.org/abs/2105.01848' - children: [] + content_layer: body enumerated: false label: list_item marker: '-' @@ -6406,6 +6541,7 @@ texts: text: '20. Zhang, Z., Zhang, J., Du, J., Wang, F.: Split, embed and merge: An accurate table structure recognizer. Pattern Recognition 126, 108565 (2022)' - children: [] + content_layer: body label: reference orig: '21. Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (gte): A framework for joint table identification and cell structure @@ -6432,6 +6568,7 @@ texts: of Computer Vision (WACV). pp. 697-706 (2021). https://doi.org/10.1109/WACV48630.2021. 00074' - children: [] + content_layer: body label: reference orig: '22. Zhong, X., ShafieiBavani, E., Jimeno Yepes, A.: Image-based table recognition: Data, model, and evaluation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.M. @@ -6456,6 +6593,7 @@ texts: (eds.) Computer Vision-ECCV 2020. pp. 564-580. Springer International Publishing, Cham (2020)' - children: [] + content_layer: body label: reference orig: '23. Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis From 5d01e618d282d77555942dede4537b69e7dde5b2 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 4 Feb 2025 11:00:19 +0100 Subject: [PATCH 06/10] Add content_layer in iterate_items Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 60ab3f36..065deda8 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -1835,6 +1835,7 @@ def iterate_items( with_groups: bool = False, traverse_pictures: bool = False, page_no: Optional[int] = None, + included_content_layers: List[ContentLayer] = [ContentLayer.BODY], _level: int = 0, # fixed parameter, carries through the node nesting level ) -> typing.Iterable[Tuple[NodeItem, int]]: # tuple of node and level """iterate_elements. @@ -1851,14 +1852,22 @@ def iterate_items( root = self.body # Yield non-group items or group items when with_groups=True - if not isinstance(root, GroupItem) or with_groups: - if isinstance(root, DocItem): - if page_no is None or any( - prov.page_no == page_no for prov in root.prov - ): - yield root, _level - else: - yield root, _level + + # Combine conditions to have a single yield point + should_yield = ( + (not isinstance(root, GroupItem) or with_groups) + and ( + not isinstance(root, DocItem) + or ( + page_no is None + or any(prov.page_no == page_no for prov in root.prov) + ) + ) + and root.content_layer in included_content_layers + ) + + if should_yield: + yield root, _level # Handle picture traversal - only traverse children if requested if isinstance(root, PictureItem) and not traverse_pictures: From ae3748b1f2e526698c1ed136cec7dc0502f28209 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 4 Feb 2025 13:15:04 +0100 Subject: [PATCH 07/10] Bump format version, add model_validator for old page_header,page_footer in body Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 19 ++++++++++++++++++- docs/DoclingDocument.json | 2 +- .../data/doc/constructed_doc.embedded.json.gt | 2 +- .../data/doc/constructed_doc.embedded.yaml.gt | 2 +- .../doc/constructed_doc.referenced.json.gt | 2 +- .../doc/constructed_doc.referenced.yaml.gt | 2 +- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 065deda8..06e59158 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -47,7 +47,7 @@ Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))] LevelNumber = typing.Annotated[int, Field(ge=1, le=100)] -CURRENT_VERSION: Final = "1.0.0" +CURRENT_VERSION: Final = "1.1.0" DEFAULT_EXPORT_LABELS = { DocItemLabel.TITLE, @@ -1442,6 +1442,23 @@ class DoclingDocument(BaseModel): pages: Dict[int, PageItem] = {} # empty as default + @model_validator(mode="after") + def transform_to_content_layer(self) -> "DoclingDocument": + """transform_to_content_layer.""" + # Since version 1.1.0, all NodeItems carry content_layer property. + # We must assign previous page_header and page_footer instances to furniture. + if self.version == "1.0.0": + for item, level in self.iterate_items( + with_groups=True, traverse_pictures=True + ): + if isinstance(item, DocItem) and item.label in [ + DocItemLabel.PAGE_HEADER, + DocItemLabel.PAGE_FOOTER, + ]: + item.content_layer = ContentLayer.FURNITURE + + return self + def add_group( self, label: Optional[GroupLabel] = None, diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json index 50046ee8..17dbb84b 100644 --- a/docs/DoclingDocument.json +++ b/docs/DoclingDocument.json @@ -1482,7 +1482,7 @@ "type": "string" }, "version": { - "default": "1.0.0", + "default": "1.1.0", "pattern": "^(?P0|[1-9]\\d*)\\.(?P0|[1-9]\\d*)\\.(?P0|[1-9]\\d*)(?:-(?P(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", "title": "Version", "type": "string" diff --git a/test/data/doc/constructed_doc.embedded.json.gt b/test/data/doc/constructed_doc.embedded.json.gt index 17e84c90..7ce60c5e 100644 --- a/test/data/doc/constructed_doc.embedded.json.gt +++ b/test/data/doc/constructed_doc.embedded.json.gt @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.0.0", + "version": "1.1.0", "name": "Untitled 1", "furniture": { "self_ref": "#/furniture", diff --git a/test/data/doc/constructed_doc.embedded.yaml.gt b/test/data/doc/constructed_doc.embedded.yaml.gt index 2ad6fdd5..47f9eea2 100644 --- a/test/data/doc/constructed_doc.embedded.yaml.gt +++ b/test/data/doc/constructed_doc.embedded.yaml.gt @@ -425,4 +425,4 @@ texts: prov: [] self_ref: '#/texts/14' text: This is the caption of figure 2. -version: 1.0.0 +version: 1.1.0 diff --git a/test/data/doc/constructed_doc.referenced.json.gt b/test/data/doc/constructed_doc.referenced.json.gt index ec98a42e..5a2f18bd 100644 --- a/test/data/doc/constructed_doc.referenced.json.gt +++ b/test/data/doc/constructed_doc.referenced.json.gt @@ -1,6 +1,6 @@ { "schema_name": "DoclingDocument", - "version": "1.0.0", + "version": "1.1.0", "name": "Untitled 1", "furniture": { "self_ref": "#/furniture", diff --git a/test/data/doc/constructed_doc.referenced.yaml.gt b/test/data/doc/constructed_doc.referenced.yaml.gt index 2afbbbb6..29e9fe0d 100644 --- a/test/data/doc/constructed_doc.referenced.yaml.gt +++ b/test/data/doc/constructed_doc.referenced.yaml.gt @@ -425,4 +425,4 @@ texts: prov: [] self_ref: '#/texts/14' text: This is the caption of figure 2. -version: 1.0.0 +version: 1.1.0 From c32977bf9022f0777ca1ce0fc9ff7520201f76a7 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 5 Feb 2025 09:35:36 +0100 Subject: [PATCH 08/10] fix: Change to before model_validator Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 23 ++- test/data/doc/2206.01062-1.0.0.json | 1 + test/data/doc/2206.01062.yaml.et | 157 +++++++++--------- .../legacy_doc/doc-export.docling.yaml.gt | 2 +- test/test_docling_doc.py | 8 + 5 files changed, 95 insertions(+), 96 deletions(-) create mode 100644 test/data/doc/2206.01062-1.0.0.json diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 06e59158..ee7a47be 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -1442,22 +1442,21 @@ class DoclingDocument(BaseModel): pages: Dict[int, PageItem] = {} # empty as default - @model_validator(mode="after") - def transform_to_content_layer(self) -> "DoclingDocument": + @model_validator(mode="before") + @classmethod + def transform_to_content_layer(cls, data: dict) -> dict: """transform_to_content_layer.""" # Since version 1.1.0, all NodeItems carry content_layer property. # We must assign previous page_header and page_footer instances to furniture. - if self.version == "1.0.0": - for item, level in self.iterate_items( - with_groups=True, traverse_pictures=True - ): - if isinstance(item, DocItem) and item.label in [ - DocItemLabel.PAGE_HEADER, - DocItemLabel.PAGE_FOOTER, + # Note: model_validators which check on the version must use "before". + if "version" in data and data["version"] == "1.0.0": + for item in data.get("texts", []): + if "label" in item and item["label"] in [ + DocItemLabel.PAGE_HEADER.value, + DocItemLabel.PAGE_FOOTER.value, ]: - item.content_layer = ContentLayer.FURNITURE - - return self + item["content_layer"] = "furniture" + return data def add_group( self, diff --git a/test/data/doc/2206.01062-1.0.0.json b/test/data/doc/2206.01062-1.0.0.json new file mode 100644 index 00000000..44506402 --- /dev/null +++ b/test/data/doc/2206.01062-1.0.0.json @@ -0,0 +1 @@ +{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "2206.01062", "origin": {"mimetype": "application/pdf", "binary_hash": 7156212269791437020, "filename": "2206.01062.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}, {"cref": "#/texts/2"}, {"cref": "#/texts/3"}, {"cref": "#/texts/4"}, {"cref": "#/texts/5"}, {"cref": "#/texts/6"}, {"cref": "#/texts/7"}, {"cref": "#/texts/8"}, {"cref": "#/texts/9"}, {"cref": "#/texts/10"}, {"cref": "#/texts/11"}, {"cref": "#/texts/12"}, {"cref": "#/texts/13"}, {"cref": "#/texts/14"}, {"cref": "#/texts/15"}, {"cref": "#/texts/16"}, {"cref": "#/pictures/0"}, {"cref": "#/texts/346"}, {"cref": "#/texts/347"}, {"cref": "#/texts/348"}, {"cref": "#/texts/349"}, {"cref": "#/texts/350"}, {"cref": "#/texts/351"}, {"cref": "#/texts/352"}, {"cref": "#/texts/353"}, {"cref": "#/texts/354"}, {"cref": "#/groups/0"}, {"cref": "#/texts/359"}, {"cref": "#/texts/360"}, {"cref": "#/groups/1"}, {"cref": "#/texts/362"}, {"cref": "#/texts/363"}, {"cref": "#/texts/364"}, {"cref": "#/texts/365"}, {"cref": "#/texts/366"}, {"cref": "#/texts/367"}, {"cref": "#/texts/368"}, {"cref": "#/texts/369"}, {"cref": "#/texts/370"}, {"cref": "#/texts/371"}, {"cref": "#/texts/372"}, {"cref": "#/pictures/1"}, {"cref": "#/texts/385"}, {"cref": "#/texts/386"}, {"cref": "#/texts/387"}, {"cref": "#/texts/388"}, {"cref": "#/texts/389"}, {"cref": "#/texts/390"}, {"cref": "#/texts/391"}, {"cref": "#/texts/392"}, {"cref": "#/texts/393"}, {"cref": "#/texts/394"}, {"cref": "#/texts/395"}, {"cref": "#/texts/396"}, {"cref": "#/tables/0"}, {"cref": "#/texts/397"}, {"cref": "#/pictures/2"}, {"cref": "#/texts/398"}, {"cref": "#/texts/399"}, {"cref": "#/texts/400"}, {"cref": "#/texts/401"}, {"cref": "#/texts/402"}, {"cref": "#/texts/403"}, {"cref": "#/texts/404"}, {"cref": "#/texts/405"}, {"cref": "#/texts/406"}, {"cref": "#/texts/407"}, {"cref": "#/texts/408"}, {"cref": "#/groups/2"}, {"cref": "#/texts/415"}, {"cref": "#/texts/416"}, {"cref": "#/texts/417"}, {"cref": "#/pictures/3"}, {"cref": "#/texts/428"}, {"cref": "#/texts/429"}, {"cref": "#/texts/430"}, {"cref": "#/texts/431"}, {"cref": "#/texts/432"}, {"cref": "#/tables/1"}, {"cref": "#/texts/433"}, {"cref": "#/texts/434"}, {"cref": "#/texts/435"}, {"cref": "#/texts/436"}, {"cref": "#/pictures/4"}, {"cref": "#/texts/459"}, {"cref": "#/texts/460"}, {"cref": "#/texts/461"}, {"cref": "#/texts/462"}, {"cref": "#/texts/463"}, {"cref": "#/texts/464"}, {"cref": "#/texts/465"}, {"cref": "#/texts/466"}, {"cref": "#/tables/2"}, {"cref": "#/texts/467"}, {"cref": "#/texts/468"}, {"cref": "#/texts/469"}, {"cref": "#/texts/470"}, {"cref": "#/tables/3"}, {"cref": "#/texts/471"}, {"cref": "#/texts/472"}, {"cref": "#/texts/473"}, {"cref": "#/texts/474"}, {"cref": "#/texts/475"}, {"cref": "#/texts/476"}, {"cref": "#/texts/477"}, {"cref": "#/tables/4"}, {"cref": "#/texts/478"}, {"cref": "#/texts/479"}, {"cref": "#/texts/480"}, {"cref": "#/texts/481"}, {"cref": "#/texts/482"}, {"cref": "#/texts/483"}, {"cref": "#/texts/484"}, {"cref": "#/texts/485"}, {"cref": "#/texts/486"}, {"cref": "#/groups/3"}, {"cref": "#/texts/500"}, {"cref": "#/texts/501"}, {"cref": "#/texts/502"}, {"cref": "#/pictures/5"}, {"cref": "#/texts/514"}, {"cref": "#/texts/515"}, {"cref": "#/groups/4"}], "name": "_root_", "label": "unspecified"}, "groups": [{"self_ref": "#/groups/0", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/355"}, {"cref": "#/texts/356"}, {"cref": "#/texts/357"}, {"cref": "#/texts/358"}], "name": "list", "label": "list"}, {"self_ref": "#/groups/1", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/361"}], "name": "list", "label": "list"}, {"self_ref": "#/groups/2", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/409"}, {"cref": "#/texts/410"}, {"cref": "#/texts/411"}, {"cref": "#/texts/412"}, {"cref": "#/texts/413"}, {"cref": "#/texts/414"}], "name": "list", "label": "list"}, {"self_ref": "#/groups/3", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/487"}, {"cref": "#/texts/488"}, {"cref": "#/texts/489"}, {"cref": "#/texts/490"}, {"cref": "#/texts/491"}, {"cref": "#/texts/492"}, {"cref": "#/texts/493"}, {"cref": "#/texts/494"}, {"cref": "#/texts/495"}, {"cref": "#/texts/496"}, {"cref": "#/texts/497"}, {"cref": "#/texts/498"}, {"cref": "#/texts/499"}], "name": "list", "label": "list"}, {"self_ref": "#/groups/4", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/516"}, {"cref": "#/texts/517"}, {"cref": "#/texts/518"}, {"cref": "#/texts/519"}, {"cref": "#/texts/520"}, {"cref": "#/texts/521"}, {"cref": "#/texts/522"}, {"cref": "#/texts/523"}, {"cref": "#/texts/524"}, {"cref": "#/texts/525"}], "name": "list", "label": "list"}], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 18.3402099609375, "t": 573.6400146484375, "r": 36.33979415893555, "b": 236.99996948242188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022"}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 107.30000305175781, "t": 708.3052978515625, "r": 505.06195068359375, "b": 672.4044189453125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "level": 1}, {"self_ref": "#/texts/2", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 90.96701049804688, "t": 658.32763671875, "r": 193.73123168945312, "b": 611.7597045898438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 73]}], "orig": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com"}, {"self_ref": "#/texts/3", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 255.11602783203125, "t": 658.32763671875, "r": 357.8802490234375, "b": 611.7597045898438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com"}, {"self_ref": "#/texts/4", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 419.2650451660156, "t": 658.32763671875, "r": 522.029296875, "b": 611.7597045898438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 70]}], "orig": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com"}, {"self_ref": "#/texts/5", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 172.54302978515625, "t": 599.942626953125, "r": 275.3072509765625, "b": 553.3746948242188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 72]}], "orig": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com"}, {"self_ref": "#/texts/6", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 336.6930236816406, "t": 599.942626953125, "r": 439.457275390625, "b": 553.3746948242188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 68]}], "orig": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com"}, {"self_ref": "#/texts/7", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 53.79803466796875, "t": 544.297119140625, "r": 111.94354248046875, "b": 533.9879760742188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "ABSTRACT", "text": "ABSTRACT", "level": 1}, {"self_ref": "#/texts/8", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.46699905395508, "t": 529.095458984375, "r": 295.5601806640625, "b": 257.7068176269531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1595]}], "orig": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis."}, {"self_ref": "#/texts/9", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 53.79800033569336, "t": 241.00308227539062, "r": 134.81988525390625, "b": 230.69398498535156, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "CCS CONCEPTS", "text": "CCS CONCEPTS", "level": 1}, {"self_ref": "#/texts/10", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.79798889160156, "t": 225.91700744628906, "r": 297.8529357910156, "b": 195.4988555908203, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 170]}], "orig": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; Object detection ;", "text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; Object detection ;"}, {"self_ref": "#/texts/11", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.79800033569336, "t": 157.60162353515625, "r": 295.11798095703125, "b": 119.2081069946289, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 397]}], "orig": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s)."}, {"self_ref": "#/texts/12", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.79800033569336, "t": 116.91976928710938, "r": 197.8627471923828, "b": 110.43414306640625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 48]}], "orig": "KDD '22, August 14-18, 2022, Washington, DC, USA", "text": "KDD '22, August 14-18, 2022, Washington, DC, USA"}, {"self_ref": "#/texts/13", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.31700134277344, "t": 108.18763732910156, "r": 186.74652099609375, "b": 101.67411041259766, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 45]}], "orig": "\u00a9 2022 Copyright held by the owner/author(s).", "text": "\u00a9 2022 Copyright held by the owner/author(s)."}, {"self_ref": "#/texts/14", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.55400085449219, "t": 100.21663665771484, "r": 157.03125, "b": 93.70310974121094, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "ACM ISBN 978-1-4503-9385-0/22/08.", "text": "ACM ISBN 978-1-4503-9385-0/22/08."}, {"self_ref": "#/texts/15", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 53.79800033569336, "t": 92.24663543701172, "r": 166.94093322753906, "b": 85.73310852050781, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 39]}], "orig": "https://doi.org/10.1145/3534678.3539043", "text": "https://doi.org/10.1145/3534678.3539043"}, {"self_ref": "#/texts/16", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 1, "bbox": {"l": 317.9549865722656, "t": 251.91700744628906, "r": 559.8057861328125, "b": 232.48475646972656, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 84]}], "orig": "Figure 1: Four examples of complex page layouts across different document categories", "text": "Figure 1: Four examples of complex page layouts across different document categories"}, {"self_ref": "#/texts/17", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.86951, "t": 440.21915, "r": 330.41248, "b": 438.04535, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "13", "text": "13"}, {"self_ref": "#/texts/18", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.83005, "t": 460.42731000000003, "r": 351.16092, "b": 458.68829, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 25]}], "orig": "USING THE VERTICAL TUBE -", "text": "USING THE VERTICAL TUBE -"}, {"self_ref": "#/texts/19", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.83005, "t": 458.81708, "r": 348.30536, "b": 457.07806, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "MODELS AY11230/11234", "text": "MODELS AY11230/11234"}, {"self_ref": "#/texts/20", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.83005, "t": 455.59561, "r": 329.05914, "b": 454.07394, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/21", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.67368, "t": 455.59561, "r": 349.95349, "b": 454.07394, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "The vertical tube can be used for", "text": "The vertical tube can be used for"}, {"self_ref": "#/texts/22", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.11752, "t": 454.16412, "r": 353.57977, "b": 452.64248999999995, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "instructional viewing or to photograph", "text": "instructional viewing or to photograph"}, {"self_ref": "#/texts/23", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.77121, "t": 452.73264, "r": 352.4306, "b": 451.211, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 36]}], "orig": "the image with a digital camera or a", "text": "the image with a digital camera or a"}, {"self_ref": "#/texts/24", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 328.15176, "t": 451.30118, "r": 337.91086, "b": 449.77951, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "micro TV unit", "text": "micro TV unit"}, {"self_ref": "#/texts/25", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.8313, "t": 449.80956999999995, "r": 329.09155, "b": 448.28793, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/26", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.72168, "t": 449.80956999999995, "r": 354.9267, "b": 448.28793, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 39]}], "orig": "Loosen the retention screw, then rotate", "text": "Loosen the retention screw, then rotate"}, {"self_ref": "#/texts/27", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.8313, "t": 448.37808, "r": 351.66949, "b": 446.85645, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "the adjustment ring to change the", "text": "the adjustment ring to change the"}, {"self_ref": "#/texts/28", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 328.21185, "t": 446.94662, "r": 346.33179, "b": 445.42496, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 28]}], "orig": "length of the vertical tube.", "text": "length of the vertical tube."}, {"self_ref": "#/texts/29", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.83005, "t": 445.15319999999997, "r": 329.12726, "b": 443.63153, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "3.", "text": "3."}, {"self_ref": "#/texts/30", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.77588, "t": 445.15319999999997, "r": 351.18005, "b": 443.63153, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Make sure that both the images in", "text": "Make sure that both the images in"}, {"self_ref": "#/texts/31", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 327.25311, "t": 537.05188, "r": 350.07861, "b": 533.13904, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "OPERATION", "text": "OPERATION"}, {"self_ref": "#/texts/32", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.07861, "t": 537.23218, "r": 351.82651, "b": 533.31934, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "(", "text": "("}, {"self_ref": "#/texts/33", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 351.82651, "t": 537.05188, "r": 360.85242, "b": 533.13904, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "cont.", "text": "cont."}, {"self_ref": "#/texts/34", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.85242, "t": 537.23218, "r": 362.60028, "b": 533.31934, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": ")", "text": ")"}, {"self_ref": "#/texts/35", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 528.50507, "r": 345.84351, "b": 526.76605, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 19]}], "orig": "SELECTING OBJECTIVE", "text": "SELECTING OBJECTIVE"}, {"self_ref": "#/texts/36", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 526.89484, "r": 340.54153, "b": 525.15582, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "MAGNIFICATION", "text": "MAGNIFICATION"}, {"self_ref": "#/texts/37", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 525.28467, "r": 328.31903, "b": 523.54559, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/38", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.03836, "t": 525.28467, "r": 354.21472, "b": 523.54559, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "There are two objectives. The lower", "text": "There are two objectives. The lower"}, {"self_ref": "#/texts/39", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 523.67444, "r": 355.19193, "b": 521.93542, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "magnification objective has a greater", "text": "magnification objective has a greater"}, {"self_ref": "#/texts/40", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 522.06421, "r": 345.80057, "b": 520.3252, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 24]}], "orig": "depth of field and view.", "text": "depth of field and view."}, {"self_ref": "#/texts/41", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 520.45398, "r": 328.33862, "b": 518.71497, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/42", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.06775, "t": 520.45398, "r": 352.39969, "b": 518.71497, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "In order to observe the specimen", "text": "In order to observe the specimen"}, {"self_ref": "#/texts/43", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 518.84381, "r": 352.90042, "b": 517.10474, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "easily use the lower magnification", "text": "easily use the lower magnification"}, {"self_ref": "#/texts/44", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 517.23358, "r": 354.59546, "b": 515.49457, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "objective first. Then, by rotating the", "text": "objective first. Then, by rotating the"}, {"self_ref": "#/texts/45", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 515.62335, "r": 350.81885, "b": 513.88434, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 30]}], "orig": "case, the magnification can be", "text": "case, the magnification can be"}, {"self_ref": "#/texts/46", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 514.01312, "r": 335.46707, "b": 512.27411, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "changed.", "text": "changed."}, {"self_ref": "#/texts/47", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 510.79272, "r": 354.57755, "b": 509.05368, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 27]}], "orig": "CHANGING THE INTERPUPILLARY", "text": "CHANGING THE INTERPUPILLARY"}, {"self_ref": "#/texts/48", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 509.18249999999995, "r": 335.1752, "b": 507.44348, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "DISTANCE", "text": "DISTANCE"}, {"self_ref": "#/texts/49", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 507.5723, "r": 328.34784, "b": 505.83325, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/50", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.08157, "t": 507.5723, "r": 354.76245, "b": 505.83325, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "The distance between the observer's", "text": "The distance between the observer's"}, {"self_ref": "#/texts/51", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 505.96207, "r": 354.6499, "b": 504.22305, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "pupils is the interpupillary distance.", "text": "pupils is the interpupillary distance."}, {"self_ref": "#/texts/52", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88037, "t": 504.35187, "r": 328.25125, "b": 502.61282, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/53", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 328.93671, "t": 504.35187, "r": 354.29825, "b": 502.61282, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "To adjust the interpupillary distance", "text": "To adjust the interpupillary distance"}, {"self_ref": "#/texts/54", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88181, "t": 502.74164, "r": 355.02075, "b": 501.00262, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "rotate the prism caps until both eyes", "text": "rotate the prism caps until both eyes"}, {"self_ref": "#/texts/55", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88181, "t": 501.13144000000005, "r": 350.82028, "b": 499.3924, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 30]}], "orig": "coincide with the image in the", "text": "coincide with the image in the"}, {"self_ref": "#/texts/56", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88181, "t": 499.52121, "r": 336.2067, "b": 497.7822, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "eyepiece.", "text": "eyepiece."}, {"self_ref": "#/texts/57", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88181, "t": 496.30078, "r": 335.3941, "b": 494.56177, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "FOCUSING", "text": "FOCUSING"}, {"self_ref": "#/texts/58", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88181, "t": 494.69058, "r": 328.34314, "b": 492.95154, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/59", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.07379, "t": 494.69058, "r": 353.18555, "b": 492.95154, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Remove the lens protective cover.", "text": "Remove the lens protective cover."}, {"self_ref": "#/texts/60", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 493.08035, "r": 328.35919, "b": 491.34134, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/61", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.0972, "t": 493.08035, "r": 353.45065, "b": 491.34134, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Place the specimen on the working", "text": "Place the specimen on the working"}, {"self_ref": "#/texts/62", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 491.47015, "r": 333.32825, "b": 489.73110999999994, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "stage.", "text": "stage."}, {"self_ref": "#/texts/63", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 489.85991999999993, "r": 328.31296, "b": 488.1209099999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "3.", "text": "3."}, {"self_ref": "#/texts/64", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.02783, "t": 489.85991999999993, "r": 354.76303, "b": 488.1209099999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 36]}], "orig": "Focus the specimen with the left eye", "text": "Focus the specimen with the left eye"}, {"self_ref": "#/texts/65", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 488.24973, "r": 355.96307, "b": 486.51068, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 40]}], "orig": "first while turning the focus knob until", "text": "first while turning the focus knob until"}, {"self_ref": "#/texts/66", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 486.6395, "r": 354.46594, "b": 484.90047999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "the image appears clear and sharp.", "text": "the image appears clear and sharp."}, {"self_ref": "#/texts/67", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 485.0293, "r": 328.25488, "b": 483.29025, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "4.", "text": "4."}, {"self_ref": "#/texts/68", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 328.9407, "t": 485.0293, "r": 356.37335, "b": 483.29025, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 40]}], "orig": "Rotate the right eyepiece ring until the", "text": "Rotate the right eyepiece ring until the"}, {"self_ref": "#/texts/69", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 483.41907, "r": 355.38867, "b": 481.68005, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 36]}], "orig": "images in each eyepiece coincide and", "text": "images in each eyepiece coincide and"}, {"self_ref": "#/texts/70", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 481.80887, "r": 343.17249, "b": 480.06982, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "are sharp and clear.", "text": "are sharp and clear."}, {"self_ref": "#/texts/71", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 478.58844, "r": 344.13388, "b": 476.84940000000006, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "CHANGING THE BULB", "text": "CHANGING THE BULB"}, {"self_ref": "#/texts/72", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 476.97821000000005, "r": 328.37418, "b": 475.23920000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/73", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.11963, "t": 476.97821000000005, "r": 348.50162, "b": 475.23920000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 26]}], "orig": "Disconnect the power cord.", "text": "Disconnect the power cord."}, {"self_ref": "#/texts/74", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88324, "t": 475.36801, "r": 328.34061, "b": 473.62897, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/75", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.06931, "t": 475.36801, "r": 353.11588, "b": 473.62897, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "When the bulb is cool, remove the", "text": "When the bulb is cool, remove the"}, {"self_ref": "#/texts/76", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88464, "t": 473.7577800000001, "r": 353.79517, "b": 472.0187700000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "oblique illuminator cap and remove", "text": "oblique illuminator cap and remove"}, {"self_ref": "#/texts/77", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88464, "t": 472.14757999999995, "r": 348.02094, "b": 470.40854, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 26]}], "orig": "the halogen bulb with cap.", "text": "the halogen bulb with cap."}, {"self_ref": "#/texts/78", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88464, "t": 470.53735, "r": 328.37512, "b": 468.79834, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "3.", "text": "3."}, {"self_ref": "#/texts/79", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.12036, "t": 470.53735, "r": 352.96808, "b": 468.79834, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "Replace with a new halogen bulb.", "text": "Replace with a new halogen bulb."}, {"self_ref": "#/texts/80", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88608, "t": 468.92715, "r": 328.36884, "b": 467.18811, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "4.", "text": "4."}, {"self_ref": "#/texts/81", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.1102, "t": 468.92715, "r": 356.5412, "b": 467.18811, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "Open the window in the base plate and", "text": "Open the window in the base plate and"}, {"self_ref": "#/texts/82", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88608, "t": 467.31692999999996, "r": 350.13828, "b": 465.57791, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 27]}], "orig": "replace the halogen lamp or", "text": "replace the halogen lamp or"}, {"self_ref": "#/texts/83", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88608, "t": 465.70673, "r": 351.59677, "b": 463.96768, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "fluorescent lamp of transmitted", "text": "fluorescent lamp of transmitted"}, {"self_ref": "#/texts/84", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.88608, "t": 464.0965, "r": 336.89197, "b": 462.35748, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "illuminator.", "text": "illuminator."}, {"self_ref": "#/texts/85", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42023, "t": 528.50507, "r": 366.93256, "b": 526.76605, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "FOCUSING", "text": "FOCUSING"}, {"self_ref": "#/texts/86", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42023, "t": 526.89484, "r": 359.89841, "b": 525.15582, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/87", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.63751, "t": 526.89484, "r": 387.98407, "b": 525.15582, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "Turn the focusing knob away or toward", "text": "Turn the focusing knob away or toward"}, {"self_ref": "#/texts/88", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42023, "t": 525.28467, "r": 384.58948, "b": 523.54559, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "you until a clear image is viewed.", "text": "you until a clear image is viewed."}, {"self_ref": "#/texts/89", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42166, "t": 523.67444, "r": 359.78549, "b": 521.93542, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/90", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.46741, "t": 523.67444, "r": 384.33441, "b": 521.93542, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "If the image is unclear, adjust the", "text": "If the image is unclear, adjust the"}, {"self_ref": "#/texts/91", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 522.06421, "r": 384.61502, "b": 520.3252, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "height of the elevator up or down,", "text": "height of the elevator up or down,"}, {"self_ref": "#/texts/92", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 520.45398, "r": 385.38922, "b": 518.71497, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "then turn the focusing knob again.", "text": "then turn the focusing knob again."}, {"self_ref": "#/texts/93", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 517.23358, "r": 377.35046, "b": 515.49457, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "ZOOM MAGNIFICATION", "text": "ZOOM MAGNIFICATION"}, {"self_ref": "#/texts/94", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 515.62335, "r": 359.89429, "b": 513.88434, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/95", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.62988, "t": 515.62335, "r": 386.37589, "b": 513.88434, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "Turn the zoom magnification knob to", "text": "Turn the zoom magnification knob to"}, {"self_ref": "#/texts/96", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 514.01312, "r": 386.78732, "b": 512.27411, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "the desired magnification and field of", "text": "the desired magnification and field of"}, {"self_ref": "#/texts/97", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 512.40295, "r": 364.16855, "b": 510.66391, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "view.", "text": "view."}, {"self_ref": "#/texts/98", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 510.79272, "r": 359.86777, "b": 509.05368, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/99", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.59012, "t": 510.79272, "r": 387.31656, "b": 509.05368, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "In most situations, it is recommended", "text": "In most situations, it is recommended"}, {"self_ref": "#/texts/100", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 509.18249999999995, "r": 381.56656, "b": 507.44348, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 28]}], "orig": "that you focus at the lowest", "text": "that you focus at the lowest"}, {"self_ref": "#/texts/101", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4231, "t": 507.5723, "r": 386.63403, "b": 505.83325, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 36]}], "orig": "magnification, then move to a higher", "text": "magnification, then move to a higher"}, {"self_ref": "#/texts/102", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 505.96207, "r": 382.77115, "b": 504.22305, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 29]}], "orig": "magnification and re-focus as", "text": "magnification and re-focus as"}, {"self_ref": "#/texts/103", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 504.35187, "r": 367.98694, "b": 502.61282, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "necessary.", "text": "necessary."}, {"self_ref": "#/texts/104", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 502.74164, "r": 359.80386, "b": 501.00262, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "3.", "text": "3."}, {"self_ref": "#/texts/105", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.49353, "t": 502.74164, "r": 386.70093, "b": 501.00262, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "If the image is not clear to both eyes", "text": "If the image is not clear to both eyes"}, {"self_ref": "#/texts/106", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 501.13144000000005, "r": 388.03534, "b": 499.3924, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "at the same time, the diopter ring may", "text": "at the same time, the diopter ring may"}, {"self_ref": "#/texts/107", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 499.52121, "r": 373.13724, "b": 497.7822, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "need adjustment.", "text": "need adjustment."}, {"self_ref": "#/texts/108", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 496.30078, "r": 381.74539, "b": 494.56177, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 23]}], "orig": "DIOPTER RING ADJUSTMENT", "text": "DIOPTER RING ADJUSTMENT"}, {"self_ref": "#/texts/109", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 494.69058, "r": 359.83682, "b": 492.95154, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/110", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.54297, "t": 494.69058, "r": 388.08289, "b": 492.95154, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 39]}], "orig": "To adjust the eyepiece for viewing with", "text": "To adjust the eyepiece for viewing with"}, {"self_ref": "#/texts/111", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 493.08035, "r": 382.73251, "b": 491.34134, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 29]}], "orig": "or without eyeglasses and for", "text": "or without eyeglasses and for"}, {"self_ref": "#/texts/112", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 491.47015, "r": 387.72266, "b": 489.73110999999994, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 39]}], "orig": "differences in acuity between the right", "text": "differences in acuity between the right"}, {"self_ref": "#/texts/113", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 489.85991999999993, "r": 384.1991, "b": 488.1209099999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "and left eyes, follow the following", "text": "and left eyes, follow the following"}, {"self_ref": "#/texts/114", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 488.24973, "r": 364.88672, "b": 486.51068, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "steps:", "text": "steps:"}, {"self_ref": "#/texts/115", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 486.6395, "r": 359.95078, "b": 484.90047999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "a.", "text": "a."}, {"self_ref": "#/texts/116", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 361.47699, "t": 486.6395, "r": 386.65988, "b": 484.90047999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Observe an image through the left", "text": "Observe an image through the left"}, {"self_ref": "#/texts/117", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 485.0293, "r": 386.7634, "b": 483.29025, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 35]}], "orig": "eyepiece and bring a specific point", "text": "eyepiece and bring a specific point"}, {"self_ref": "#/texts/118", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 483.41907, "r": 385.41354, "b": 481.68005, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "into focus using the focus knob.", "text": "into focus using the focus knob."}, {"self_ref": "#/texts/119", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42453, "t": 481.80887, "r": 359.93304, "b": 480.06982, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "b.", "text": "b."}, {"self_ref": "#/texts/120", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 361.44156, "t": 481.80887, "r": 382.56085, "b": 480.06982, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 27]}], "orig": "By turning the diopter ring", "text": "By turning the diopter ring"}, {"self_ref": "#/texts/121", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 480.19864, "r": 385.4559, "b": 478.45963, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "adjustment for the left eyepiece,", "text": "adjustment for the left eyepiece,"}, {"self_ref": "#/texts/122", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 478.58844, "r": 384.56122, "b": 476.84940000000006, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "bring the same point into sharp", "text": "bring the same point into sharp"}, {"self_ref": "#/texts/123", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 476.97821000000005, "r": 366.74371, "b": 475.23920000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "focus.", "text": "focus."}, {"self_ref": "#/texts/124", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 475.36801, "r": 383.93884, "b": 473.62897, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "c.Then bring the same point into", "text": "c.Then bring the same point into"}, {"self_ref": "#/texts/125", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 473.7577800000001, "r": 385.69241, "b": 472.0187700000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "focus through the right eyepiece", "text": "focus through the right eyepiece"}, {"self_ref": "#/texts/126", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 472.14757999999995, "r": 385.94861, "b": 470.40854, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "by turning the right diopter ring.", "text": "by turning the right diopter ring."}, {"self_ref": "#/texts/127", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 470.53735, "r": 385.54236, "b": 468.79834, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "d.With more than one viewer, each", "text": "d.With more than one viewer, each"}, {"self_ref": "#/texts/128", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 468.92715, "r": 382.98718, "b": 467.18811, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 28]}], "orig": "viewer should note their own", "text": "viewer should note their own"}, {"self_ref": "#/texts/129", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 467.31692999999996, "r": 385.06448, "b": 465.57791, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "diopter ring position for the left", "text": "diopter ring position for the left"}, {"self_ref": "#/texts/130", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 465.70673, "r": 385.20682, "b": 463.96768, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "and right eyepieces, then before", "text": "and right eyepieces, then before"}, {"self_ref": "#/texts/131", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 464.0965, "r": 382.21964, "b": 462.35748, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 28]}], "orig": "viewing set the diopter ring", "text": "viewing set the diopter ring"}, {"self_ref": "#/texts/132", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 462.4863, "r": 382.63382, "b": 460.74725, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 28]}], "orig": "adjustments to that setting.", "text": "adjustments to that setting."}, {"self_ref": "#/texts/133", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 459.26587000000006, "r": 375.67661, "b": 457.52682000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "CHANGING THE BULB", "text": "CHANGING THE BULB"}, {"self_ref": "#/texts/134", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 457.65564, "r": 359.90311, "b": 455.91663, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "1.", "text": "1."}, {"self_ref": "#/texts/135", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.64169, "t": 457.65564, "r": 385.75333, "b": 455.91663, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "Disconnect the power cord from the", "text": "Disconnect the power cord from the"}, {"self_ref": "#/texts/136", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 456.04544, "r": 372.01416, "b": 454.3064, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "electrical outlet.", "text": "electrical outlet."}, {"self_ref": "#/texts/137", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 454.43521, "r": 359.88327, "b": 452.6962, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "2.", "text": "2."}, {"self_ref": "#/texts/138", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.61191, "t": 454.43521, "r": 384.65726, "b": 452.6962, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "When the bulb is cool, remove the", "text": "When the bulb is cool, remove the"}, {"self_ref": "#/texts/139", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 452.82501, "r": 385.33649, "b": 451.0859699999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 34]}], "orig": "oblique illuminator cap and remove", "text": "oblique illuminator cap and remove"}, {"self_ref": "#/texts/140", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42596, "t": 451.21478, "r": 379.57224, "b": 449.47577, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 26]}], "orig": "the halogen bulb with cap.", "text": "the halogen bulb with cap."}, {"self_ref": "#/texts/141", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.4274, "t": 449.60458, "r": 359.91788, "b": 447.86553999999995, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "3.", "text": "3."}, {"self_ref": "#/texts/142", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.66312, "t": 449.60458, "r": 384.5108, "b": 447.86553999999995, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "Replace with a new halogen bulb.", "text": "Replace with a new halogen bulb."}, {"self_ref": "#/texts/143", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42883, "t": 447.99434999999994, "r": 359.92792, "b": 446.25534, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "4.", "text": "4."}, {"self_ref": "#/texts/144", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 360.67746, "t": 447.99434999999994, "r": 385.41235, "b": 446.25534, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Open the window in the base plate", "text": "Open the window in the base plate"}, {"self_ref": "#/texts/145", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42883, "t": 446.38416, "r": 383.2782, "b": 444.64511, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "and replace the halogen lamp or", "text": "and replace the halogen lamp or"}, {"self_ref": "#/texts/146", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42883, "t": 444.77393, "r": 383.13953, "b": 443.03491, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "fluorescent lamp of transmitted", "text": "fluorescent lamp of transmitted"}, {"self_ref": "#/texts/147", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.42883, "t": 443.16373, "r": 368.43472, "b": 441.42468, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "illuminator.", "text": "illuminator."}, {"self_ref": "#/texts/148", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 326.59567, "t": 530.85815, "r": 339.11377, "b": 529.11908, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Model AY11230", "text": "Model AY11230"}, {"self_ref": "#/texts/149", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 358.48605, "t": 530.85815, "r": 371.00415, "b": 529.11908, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Model AY11234", "text": "Model AY11234"}, {"self_ref": "#/texts/150", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 455.43533, "t": 440.22961000000004, "r": 457.97827000000007, "b": 438.05585, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "14", "text": "14"}, {"self_ref": "#/texts/151", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 408.24518, "t": 516.47327, "r": 414.4234, "b": 515.03979, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Objectives", "text": "Objectives"}, {"self_ref": "#/texts/152", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 409.39554, "t": 523.01764, "r": 419.06677, "b": 521.58417, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "Revolving Turret", "text": "Revolving Turret"}, {"self_ref": "#/texts/153", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.3895, "t": 512.87372, "r": 445.87192, "b": 511.44025, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "Coarse", "text": "Coarse"}, {"self_ref": "#/texts/154", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.3895, "t": 511.69391, "r": 448.22338999999994, "b": 510.2604099999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Adjustment", "text": "Adjustment"}, {"self_ref": "#/texts/155", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.3895, "t": 510.51407, "r": 444.40371999999996, "b": 509.08060000000006, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Knob", "text": "Knob"}, {"self_ref": "#/texts/156", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.79288, "t": 537.05353, "r": 428.91568, "b": 533.14069, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "MODEL AY11236", "text": "MODEL AY11236"}, {"self_ref": "#/texts/157", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.32535, "t": 486.95709, "r": 435.93542, "b": 483.04427999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "MICROSCOPE USAGE", "text": "MICROSCOPE USAGE"}, {"self_ref": "#/texts/158", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 481.64108, "r": 453.72171, "b": 479.46729, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 55]}], "orig": "BARSKA Model AY11236 is a powerful fixed power compound", "text": "BARSKA Model AY11236 is a powerful fixed power compound"}, {"self_ref": "#/texts/159", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 479.49414, "r": 453.09939999999995, "b": 477.32034, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 59]}], "orig": "microscope designed for biological studies such as specimen", "text": "microscope designed for biological studies such as specimen"}, {"self_ref": "#/texts/160", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 477.3472, "r": 456.65246999999994, "b": 475.1734, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 59]}], "orig": "examination. It can also be used for examining bacteria and", "text": "examination. It can also be used for examining bacteria and"}, {"self_ref": "#/texts/161", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 475.20023, "r": 456.73859000000004, "b": 473.02646, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 67]}], "orig": "for general clinical and medical studies and other scientific uses.", "text": "for general clinical and medical studies and other scientific uses."}, {"self_ref": "#/texts/162", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.62399, "t": 471.57059, "r": 427.77472, "b": 467.65777999999995, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "CONSTRUCTION", "text": "CONSTRUCTION"}, {"self_ref": "#/texts/163", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 465.53930999999994, "r": 456.02639999999997, "b": 463.36551, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 58]}], "orig": "BARSKA Model AY11236 is a fixed power compound microscope.", "text": "BARSKA Model AY11236 is a fixed power compound microscope."}, {"self_ref": "#/texts/164", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08414, "t": 463.3923300000001, "r": 455.42238999999995, "b": 461.2185400000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 65]}], "orig": "It is constructed with two optical paths at the same angle. It is", "text": "It is constructed with two optical paths at the same angle. It is"}, {"self_ref": "#/texts/165", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08414, "t": 461.24539, "r": 457.39844, "b": 459.07159, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 65]}], "orig": "equipped with transmitted illumination. By using this instrument,", "text": "equipped with transmitted illumination. By using this instrument,"}, {"self_ref": "#/texts/166", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08414, "t": 459.09845, "r": 453.97745, "b": 456.92464999999993, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 59]}], "orig": "the user can observe specimens at magnification from 40x to", "text": "the user can observe specimens at magnification from 40x to"}, {"self_ref": "#/texts/167", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08414, "t": 456.95148, "r": 454.70708999999994, "b": 454.77768, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 62]}], "orig": "1000x by selecting the desired objective lens. Coarse and fine", "text": "1000x by selecting the desired objective lens. Coarse and fine"}, {"self_ref": "#/texts/168", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08414, "t": 454.80453, "r": 458.90240000000006, "b": 452.63074, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 65]}], "orig": "focus adjustments provide accuracy and image detail. The rotating", "text": "focus adjustments provide accuracy and image detail. The rotating"}, {"self_ref": "#/texts/169", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 452.65759, "r": 453.0672, "b": 450.4838, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 58]}], "orig": "head allows the user to position the eyepieces for maximum", "text": "head allows the user to position the eyepieces for maximum"}, {"self_ref": "#/texts/170", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 398.08594, "t": 450.51062, "r": 449.63113, "b": 448.33682, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 56]}], "orig": "viewing comfort and easy access to all adjustment knobs.", "text": "viewing comfort and easy access to all adjustment knobs."}, {"self_ref": "#/texts/171", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 422.10626, "t": 490.75809, "r": 434.62433000000004, "b": 489.01904, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Model AY11236", "text": "Model AY11236"}, {"self_ref": "#/texts/172", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 442.01610999999997, "t": 508.91351, "r": 444.8817399999999, "b": 507.48004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Fine", "text": "Fine"}, {"self_ref": "#/texts/173", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 442.01610999999997, "t": 507.7337, "r": 448.85001, "b": 506.30019999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Adjustment", "text": "Adjustment"}, {"self_ref": "#/texts/174", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 442.01610999999997, "t": 506.55389, "r": 445.03033000000005, "b": 505.12039, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Knob", "text": "Knob"}, {"self_ref": "#/texts/175", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 408.00577, "t": 512.87421, "r": 411.42212, "b": 511.4407, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Stage", "text": "Stage"}, {"self_ref": "#/texts/176", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 404.07172, "t": 511.0855700000001, "r": 410.77707, "b": 509.6521, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "Condenser", "text": "Condenser"}, {"self_ref": "#/texts/177", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 404.07172, "t": 509.90576, "r": 409.2157, "b": 508.47226, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "Focusing", "text": "Focusing"}, {"self_ref": "#/texts/178", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 404.07172, "t": 508.72592, "r": 407.08594, "b": 507.2924499999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Knob", "text": "Knob"}, {"self_ref": "#/texts/179", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.81281, "t": 529.67822, "r": 447.03702, "b": 528.24475, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "Eyepiece", "text": "Eyepiece"}, {"self_ref": "#/texts/180", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 437.34607, "t": 520.86975, "r": 440.80496, "b": 519.43719, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Stand", "text": "Stand"}, {"self_ref": "#/texts/181", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 409.7164, "t": 507.59973, "r": 413.3768, "b": 506.16718, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Lamp", "text": "Lamp"}, {"self_ref": "#/texts/182", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 409.7164, "t": 506.16837, "r": 413.68201, "b": 504.73584, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "On/Off", "text": "On/Off"}, {"self_ref": "#/texts/183", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 409.7164, "t": 504.737, "r": 413.6337, "b": 503.30447, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "Switch", "text": "Switch"}, {"self_ref": "#/texts/184", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 434.8712499999999, "t": 495.2847, "r": 438.53164999999996, "b": 493.85217, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Lamp", "text": "Lamp"}, {"self_ref": "#/texts/185", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 439.52039, "t": 499.81692999999996, "r": 443.08768, "b": 498.38439999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Power", "text": "Power"}, {"self_ref": "#/texts/186", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 439.52039, "t": 498.38556, "r": 442.29575, "b": 496.95303, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Cord", "text": "Cord"}, {"self_ref": "#/texts/187", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 413.55829, "t": 527.33911, "r": 421.94913, "b": 525.90656, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Rotating Head", "text": "Rotating Head"}, {"self_ref": "#/texts/188", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.84316999999993, "t": 505.09427, "r": 447.87585000000007, "b": 503.66174, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Stage Clip", "text": "Stage Clip"}, {"self_ref": "#/texts/189", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 441.84316999999993, "t": 503.6629, "r": 448.67252, "b": 502.23037999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Adjustment", "text": "Adjustment"}, {"self_ref": "#/texts/190", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 407.2403, "t": 532.13354, "r": 425.79089, "b": 530.70105, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "Interpupillary Slide Adjustment", "text": "Interpupillary Slide Adjustment"}, {"self_ref": "#/texts/191", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 413.33698, "r": 466.08835000000005, "b": 411.21588, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "Circling Minimums", "text": "Circling Minimums"}, {"self_ref": "#/texts/192", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 408.7796000000001, "r": 449.64444, "b": 406.65851000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "7", "text": "7"}, {"self_ref": "#/texts/193", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 450.18811, "t": 408.7796000000001, "r": 550.77124, "b": 406.65851000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 184]}], "orig": "K H U H Z D V D F K D Q J H W R W K H 7 ( 5 3 6 F U L W H U L D L Q W K D W D \u1087H F W V F L U F O L Q J D U H D G L P H Q V L R Q E \\ H [ S D Q G L Q J W K H D U H D V W R S U R Y L G H", "text": "K H U H Z D V D F K D Q J H W R W K H 7 ( 5 3 6 F U L W H U L D L Q W K D W D \u1087H F W V F L U F O L Q J D U H D G L P H Q V L R Q E \\ H [ S D Q G L Q J W K H D U H D V W R S U R Y L G H"}, {"self_ref": "#/texts/194", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 406.24268, "r": 536.14716, "b": 404.12158, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 104]}], "orig": "improved obstacle protection. To indicate that the new criteria had been applied to a given procedure, a", "text": "improved obstacle protection. To indicate that the new criteria had been applied to a given procedure, a"}, {"self_ref": "#/texts/195", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 538.31085, "t": 406.24268, "r": 549.49921, "b": 404.12158, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "is placed on", "text": "is placed on"}, {"self_ref": "#/texts/196", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 403.96399, "r": 547.58185, "b": 401.8429, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 119]}], "orig": "the circling line of minimums. The new circling tables and explanatory information is located in the Legend of the TPP.", "text": "the circling line of minimums. The new circling tables and explanatory information is located in the Legend of the TPP."}, {"self_ref": "#/texts/197", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 398.7871999999999, "r": 449.6163, "b": 396.66614, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "7", "text": "7"}, {"self_ref": "#/texts/198", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 450.1319, "t": 398.7871999999999, "r": 529.53082, "b": 396.66614, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 153]}], "orig": "K H D S S U R D F K H V X V L Q J V W D Q G D U G F L U F O L Q J D S S U R D F K D U H D V F D Q E H L G H Q W L \u00bf H G E \\ W K H D E V H Q F H R I W K H", "text": "K H D S S U R D F K H V X V L Q J V W D Q G D U G F L U F O L Q J D S S U R D F K D U H D V F D Q E H L G H Q W L \u00bf H G E \\ W K H D E V H Q F H R I W K H"}, {"self_ref": "#/texts/199", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 532.05829, "t": 398.7871999999999, "r": 550.42261, "b": 396.66614, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 23]}], "orig": "on the circling line of", "text": "on the circling line of"}, {"self_ref": "#/texts/200", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 396.50851, "r": 455.74692, "b": 394.38745, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "minima.", "text": "minima."}, {"self_ref": "#/texts/201", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.95525999999995, "t": 376.40451, "r": 496.2829, "b": 374.49554, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 101]}], "orig": "$ S S O \\ 6 W D Q G D U G & L U F O L Q J $ S S U R D F K 0 D Q H X Y H U L Q J 5 D G L X V 7 D E O H", "text": "$ S S O \\ 6 W D Q G D U G & L U F O L Q J $ S S U R D F K 0 D Q H X Y H U L Q J 5 D G L X V 7 D E O H"}, {"self_ref": "#/texts/202", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 501.13077, "t": 382.74457, "r": 551.16101, "b": 380.8356, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 107]}], "orig": "$ S S O \\ ( [ S D Q G H G & L U F O L Q J $ S S U R D F K 0 D Q H X Y H U L Q J $ L U V S D F H 5 D G L X V", "text": "$ S S O \\ ( [ S D Q G H G & L U F O L Q J $ S S U R D F K 0 D Q H X Y H U L Q J $ L U V S D F H 5 D G L X V"}, {"self_ref": "#/texts/203", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 501.13077, "t": 380.69376, "r": 505.2477999999999, "b": 378.78479, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Table", "text": "Table"}, {"self_ref": "#/texts/204", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 371.81198, "r": 469.35599, "b": 369.26669, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "AIRPORT SKETCH", "text": "AIRPORT SKETCH"}, {"self_ref": "#/texts/205", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 366.91092, "r": 525.93616, "b": 364.78983, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 92]}], "orig": "The airport sketch is a depiction of the airport with emphasis on runway pattern and related", "text": "The airport sketch is a depiction of the airport with emphasis on runway pattern and related"}, {"self_ref": "#/texts/206", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 364.6322, "r": 522.0343, "b": 362.51114, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "information, positioned in either the lower left or lower right corner of the chart to aid pi-", "text": "information, positioned in either the lower left or lower right corner of the chart to aid pi-"}, {"self_ref": "#/texts/207", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 362.35352, "r": 524.67151, "b": 360.23245, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 92]}], "orig": "lot recognition of the airport from the air and to provide some information to aid on ground", "text": "lot recognition of the airport from the air and to provide some information to aid on ground"}, {"self_ref": "#/texts/208", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 360.07485999999994, "r": 527.172, "b": 357.95377, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 92]}], "orig": "navigation of the airport. The runways are drawn to scale and oriented to true north. Runway", "text": "navigation of the airport. The runways are drawn to scale and oriented to true north. Runway"}, {"self_ref": "#/texts/209", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 357.79617, "r": 502.39545, "b": 355.67508, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 63]}], "orig": "dimensions (length and width) are shown for all active runways.", "text": "dimensions (length and width) are shown for all active runways."}, {"self_ref": "#/texts/210", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 353.2388000000001, "r": 512.92676, "b": 351.11771000000005, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 73]}], "orig": "Runway(s) are depicted based on what type and construction of the runway.", "text": "Runway(s) are depicted based on what type and construction of the runway."}, {"self_ref": "#/texts/211", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.95525999999995, "t": 347.92999, "r": 460.02307, "b": 346.02099999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "Hard Surface", "text": "Hard Surface"}, {"self_ref": "#/texts/212", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 464.89963, "t": 347.92999, "r": 473.98819, "b": 346.02099999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Other Than", "text": "Other Than"}, {"self_ref": "#/texts/213", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 464.89963, "t": 345.87915, "r": 474.96744, "b": 343.97021, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "Hard Surface", "text": "Hard Surface"}, {"self_ref": "#/texts/214", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.91357, "t": 347.92999, "r": 489.45648, "b": 346.02099999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Metal Surface", "text": "Metal Surface"}, {"self_ref": "#/texts/215", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 493.06420999999995, "t": 347.92999, "r": 505.03076, "b": 346.02099999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Closed Runway", "text": "Closed Runway"}, {"self_ref": "#/texts/216", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 509.5809, "t": 347.92999, "r": 524.30237, "b": 346.02099999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "Under Construction", "text": "Under Construction"}, {"self_ref": "#/texts/217", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.95525999999995, "t": 337.18793, "r": 458.31406, "b": 335.27896, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "Stopways,", "text": "Stopways,"}, {"self_ref": "#/texts/218", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.95525999999995, "t": 335.13712, "r": 461.92083999999994, "b": 333.22814999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "Taxiways, Park-", "text": "Taxiways, Park-"}, {"self_ref": "#/texts/219", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.95525999999995, "t": 333.08627, "r": 457.08014, "b": 331.17731000000003, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "ing Areas", "text": "ing Areas"}, {"self_ref": "#/texts/220", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 464.89963, "t": 337.18793, "r": 472.87732, "b": 335.27896, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "Displaced", "text": "Displaced"}, {"self_ref": "#/texts/221", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 464.89963, "t": 335.13712, "r": 472.49792, "b": 333.22814999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "Threshold", "text": "Threshold"}, {"self_ref": "#/texts/222", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.91357, "t": 337.18793, "r": 483.61584, "b": 335.27896, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "Closed", "text": "Closed"}, {"self_ref": "#/texts/223", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.91357, "t": 335.13712, "r": 486.60754000000003, "b": 333.22814999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "Pavement", "text": "Pavement"}, {"self_ref": "#/texts/224", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 493.06420999999995, "t": 337.18793, "r": 504.20648, "b": 335.27896, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "Water Runway", "text": "Water Runway"}, {"self_ref": "#/texts/225", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 322.67026, "r": 548.59674, "b": 320.54919, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 110]}], "orig": "Taxiways and aprons are shaded grey. Other runway features that may be shown are runway numbers, runway dimen-", "text": "Taxiways and aprons are shaded grey. Other runway features that may be shown are runway numbers, runway dimen-"}, {"self_ref": "#/texts/226", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 320.39157, "r": 500.08181999999994, "b": 318.27051, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 61]}], "orig": "sions, runway slope, arresting gear, and displaced threshold.", "text": "sions, runway slope, arresting gear, and displaced threshold."}, {"self_ref": "#/texts/227", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 315.83423, "r": 449.59933000000007, "b": 313.71313, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "2", "text": "2"}, {"self_ref": "#/texts/228", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 450.09796, "t": 315.83423, "r": 547.82562, "b": 313.71313, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 195]}], "orig": "W K H U L Q I R U P D W L R Q F R Q F H U Q L Q J O L J K W L Q J \u00bf Q D O D S S U R D F K E H D U L Q J V D L U S R U W E H D F R Q R E V W D F O H V F R Q W U R O W R Z H U 1 $ 9 $ , ' V K H O L", "text": "W K H U L Q I R U P D W L R Q F R Q F H U Q L Q J O L J K W L Q J \u00bf Q D O D S S U R D F K E H D U L Q J V D L U S R U W E H D F R Q R E V W D F O H V F R Q W U R O W R Z H U 1 $ 9 $ , ' V K H O L"}, {"self_ref": "#/texts/229", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 547.82623, "t": 315.83423, "r": 548.45862, "b": 313.71313, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "-", "text": "-"}, {"self_ref": "#/texts/230", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 313.55554, "r": 470.52609000000007, "b": 311.43445, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 23]}], "orig": "pads may also be shown.", "text": "pads may also be shown."}, {"self_ref": "#/texts/231", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 308.99817, "r": 493.37906000000004, "b": 306.87708, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 81]}], "orig": "$ L U S R U W ( O H Y D W L R Q D Q G 7 R X F K G R Z Q = R Q H ( O H Y D W L R Q", "text": "$ L U S R U W ( O H Y D W L R Q D Q G 7 R X F K G R Z Q = R Q H ( O H Y D W L R Q"}, {"self_ref": "#/texts/232", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 449.10074000000003, "t": 304.4408, "r": 551.80023, "b": 295.48364, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 496]}], "orig": "The airport elevation is shown enclosed within a box in the upper left corner of the sketch box and the touchdown zone elevation (TDZE) is shown in the upper right corner of the sketch box. The airport elevation is the highest point of an D L U S R U W \u00b6 V X V D E O H U X Q Z D \\ V P H D V X U H G L Q I H H W I U R P P H D Q V H D O H Y H O 7 K H 7 ' = ( L V W K H K L J K H V W H O H Y D W L R Q L Q W K H \u00bf U V W I H H W R I the landing surface. Circling only approaches will not show a TDZE.", "text": "The airport elevation is shown enclosed within a box in the upper left corner of the sketch box and the touchdown zone elevation (TDZE) is shown in the upper right corner of the sketch box. The airport elevation is the highest point of an D L U S R U W \u00b6 V X V D E O H U X Q Z D \\ V P H D V X U H G L Q I H H W I U R P P H D Q V H D O H Y H O 7 K H 7 ' = ( L V W K H K L J K H V W H O H Y D W L R Q L Q W K H \u00bf U V W I H H W R I the landing surface. Circling only approaches will not show a TDZE."}, {"self_ref": "#/texts/233", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.80661000000003, "t": 276.05629999999996, "r": 502.08792, "b": 272.98235999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "114", "text": "114"}, {"self_ref": "#/texts/234", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 444.56319999999994, "t": 369.15131, "r": 446.25998, "b": 320.12872, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 70]}], "orig": "FAA Chart Users\u2019 Guide - Terminal Procedures Publication (TPP) - Terms", "text": "FAA Chart Users\u2019 Guide - Terminal Procedures Publication (TPP) - Terms"}, {"self_ref": "#/texts/235", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 412.62463, "r": 355.13138, "b": 409.86664, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 27]}], "orig": "AGL 2013 Financial Calendar", "text": "AGL 2013 Financial Calendar"}, {"self_ref": "#/texts/236", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 409.69727, "r": 330.96848, "b": 407.44073, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "22", "text": "22"}, {"self_ref": "#/texts/237", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.75003, "t": 409.69727, "r": 341.12875, "b": 407.44073, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 11]}], "orig": "August 2012", "text": "August 2012"}, {"self_ref": "#/texts/238", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.4722, "t": 409.69727, "r": 384.81079, "b": 407.44073, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 51]}], "orig": "2012 full year result and fi nal dividend announced", "text": "2012 full year result and fi nal dividend announced"}, {"self_ref": "#/texts/239", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 407.15448, "r": 330.97336, "b": 404.89795, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "30", "text": "30"}, {"self_ref": "#/texts/240", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.75735, "t": 407.15448, "r": 341.16534, "b": 404.89795, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 11]}], "orig": "August 2012", "text": "August 2012"}, {"self_ref": "#/texts/241", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.4722, "t": 407.15448, "r": 372.90613, "b": 404.89795, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 29]}], "orig": "Ex-dividend trading commences", "text": "Ex-dividend trading commences"}, {"self_ref": "#/texts/242", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 404.61172, "r": 330.20337, "b": 402.35516000000007, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "5", "text": "5"}, {"self_ref": "#/texts/243", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.00137, "t": 404.61172, "r": 342.9715, "b": 402.35516000000007, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "September 2012", "text": "September 2012"}, {"self_ref": "#/texts/244", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.4722, "t": 404.61172, "r": 374.88693, "b": 402.35516000000007, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 36]}], "orig": "Record date for 2012 fi nal dividend", "text": "Record date for 2012 fi nal dividend"}, {"self_ref": "#/texts/245", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 402.06897, "r": 331.0173, "b": 399.81238, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "27", "text": "27"}, {"self_ref": "#/texts/246", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.82327, "t": 402.06897, "r": 343.91284, "b": 399.81238, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "September 2012", "text": "September 2012"}, {"self_ref": "#/texts/247", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.4722, "t": 402.06897, "r": 365.65988, "b": 399.81238, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 22]}], "orig": "Final dividend payable", "text": "Final dividend payable"}, {"self_ref": "#/texts/248", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 399.52618, "r": 330.98804, "b": 397.26962000000003, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "23", "text": "23"}, {"self_ref": "#/texts/249", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.77936, "t": 399.52618, "r": 342.06674, "b": 397.26962000000003, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "October 2012", "text": "October 2012"}, {"self_ref": "#/texts/250", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.4722, "t": 399.52618, "r": 367.22156, "b": 397.26962000000003, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 22]}], "orig": "Annual General Meeting", "text": "Annual General Meeting"}, {"self_ref": "#/texts/251", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 396.9834, "r": 330.99741, "b": 394.72687, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "27", "text": "27"}, {"self_ref": "#/texts/252", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.7934, "t": 396.9834, "r": 342.1416, "b": 394.72687, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "February 2013", "text": "February 2013"}, {"self_ref": "#/texts/253", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 342.64841, "t": 396.81702, "r": 342.65811, "b": 395.50142999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}, {"self_ref": "#/texts/254", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.47177, "t": 396.98526, "r": 386.25897, "b": 394.7287, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 50]}], "orig": "2013 interim result and interim dividend announced", "text": "2013 interim result and interim dividend announced"}, {"self_ref": "#/texts/255", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40491, "t": 394.44250000000005, "r": 331.02695, "b": 392.18594, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "28", "text": "28"}, {"self_ref": "#/texts/256", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 331.83795, "t": 394.44250000000005, "r": 340.75909, "b": 392.18594, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 11]}], "orig": "August 2013", "text": "August 2013"}, {"self_ref": "#/texts/257", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 341.26437, "t": 394.2746, "r": 341.27408, "b": 392.95905, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}, {"self_ref": "#/texts/258", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 350.47144, "t": 394.44287, "r": 385.93265, "b": 392.18631, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 52]}], "orig": "2013 full year results and fi nal dividend announced", "text": "2013 full year results and fi nal dividend announced"}, {"self_ref": "#/texts/259", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 391.53845, "r": 329.87708, "b": 390.03412, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}, {"self_ref": "#/texts/260", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 330.34882, "t": 391.53845, "r": 358.65204, "b": 390.03412, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 60]}], "orig": "Indicative dates only, subject to change/Board confi rmation", "text": "Indicative dates only, subject to change/Board confi rmation"}, {"self_ref": "#/texts/261", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 387.65497, "r": 391.771, "b": 385.39844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 87]}], "orig": "AGL\u2019s Annual General Meeting will be held at the City Recital Hall, Angel Place, Sydney", "text": "AGL\u2019s Annual General Meeting will be held at the City Recital Hall, Angel Place, Sydney"}, {"self_ref": "#/texts/262", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 385.62143, "r": 369.65308, "b": 383.36486999999994, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 49]}], "orig": "commencing at 10.30am on Tuesday 23 October 2012.", "text": "commencing at 10.30am on Tuesday 23 October 2012."}, {"self_ref": "#/texts/263", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 331.46945000000005, "r": 379.25955, "b": 326.45493, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Ye s te rd ay", "text": "Ye s te rd ay"}, {"self_ref": "#/texts/264", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 325.2843, "r": 391.38229, "b": 323.02777, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 39]}], "orig": "Established in Sydney in 1837, and then", "text": "Established in Sydney in 1837, and then"}, {"self_ref": "#/texts/265", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 323.25076, "r": 395.01788, "b": 320.99423, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 42]}], "orig": "known as The Australian Gas Light Company,", "text": "known as The Australian Gas Light Company,"}, {"self_ref": "#/texts/266", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 321.21719, "r": 394.08322, "b": 318.96066, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 43]}], "orig": "the AGL business has an established history", "text": "the AGL business has an established history"}, {"self_ref": "#/texts/267", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 319.18365, "r": 390.60727, "b": 316.92712, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 38]}], "orig": "and reputation for serving the gas and", "text": "and reputation for serving the gas and"}, {"self_ref": "#/texts/268", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 317.15012, "r": 393.49612, "b": 314.89355, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 43]}], "orig": "electricity needs of Australian households.", "text": "electricity needs of Australian households."}, {"self_ref": "#/texts/269", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 315.11655, "r": 394.11481, "b": 312.86002, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 43]}], "orig": "In 1841, when AGL supplied the gas to light", "text": "In 1841, when AGL supplied the gas to light"}, {"self_ref": "#/texts/270", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 313.08301, "r": 393.75891, "b": 310.82648, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 46]}], "orig": "the fi rst public street lamp, it was reported", "text": "the fi rst public street lamp, it was reported"}, {"self_ref": "#/texts/271", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 311.04947, "r": 390.4975, "b": 308.79291, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "in the Sydney Gazette as a \u201cwonderful", "text": "in the Sydney Gazette as a \u201cwonderful"}, {"self_ref": "#/texts/272", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 309.0159, "r": 395.70975, "b": 306.75937, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 46]}], "orig": "achievement of scientifi c knowledge, assisted", "text": "achievement of scientifi c knowledge, assisted"}, {"self_ref": "#/texts/273", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 306.98236, "r": 394.27283, "b": 304.7258, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 43]}], "orig": "by mechanical ingenuity.\u201d Within two years,", "text": "by mechanical ingenuity.\u201d Within two years,"}, {"self_ref": "#/texts/274", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 363.54486, "t": 304.94879, "r": 396.65939, "b": 302.69226, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 47]}], "orig": "165 gas lamps were lighting the City of Sydney.", "text": "165 gas lamps were lighting the City of Sydney."}, {"self_ref": "#/texts/275", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.4054, "t": 372.06876, "r": 384.19696, "b": 360.90588, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "Looking back on", "text": "Looking back on"}, {"self_ref": "#/texts/276", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.4054, "t": 361.89621, "r": 372.16626, "b": 350.73331, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "175 years of", "text": "175 years of"}, {"self_ref": "#/texts/277", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.4054, "t": 351.72363000000007, "r": 385.3981, "b": 340.56076, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "looking forward.", "text": "looking forward."}, {"self_ref": "#/texts/278", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 329.40536, "t": 419.83841, "r": 353.36179, "b": 418.08331, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 37]}], "orig": "AGL Energy Limited ABN 74 115 061 375", "text": "AGL Energy Limited ABN 74 115 061 375"}, {"self_ref": "#/texts/279", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 546.20587, "t": 431.09552, "r": 548.23407, "b": 429.17758, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "29", "text": "29"}, {"self_ref": "#/texts/280", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 497.77728, "t": 540.56616, "r": 542.8255, "b": 537.05615, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 32]}], "orig": "signs, signals and road markings", "text": "signs, signals and road markings"}, {"self_ref": "#/texts/281", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 490.30679, "t": 540.52521, "r": 492.09982, "b": 537.0152, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "3", "text": "3"}, {"self_ref": "#/texts/282", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 528.11078, "r": 500.05637, "b": 526.07281, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "In", "text": "In"}, {"self_ref": "#/texts/283", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 500.05637, "t": 528.14282, "r": 524.37036, "b": 526.1369, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 31]}], "orig": "chapter 2, you and your vehicle", "text": "chapter 2, you and your vehicle"}, {"self_ref": "#/texts/284", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 524.37036, "t": 528.11078, "r": 539.89124, "b": 526.07281, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 19]}], "orig": ", you learned about", "text": ", you learned about"}, {"self_ref": "#/texts/285", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 526.06775, "r": 544.50403, "b": 524.02979, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 61]}], "orig": "some of the controls in your vehicle. This chapter is a handy", "text": "some of the controls in your vehicle. This chapter is a handy"}, {"self_ref": "#/texts/286", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 524.02466, "r": 544.01343, "b": 521.98669, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 56]}], "orig": "reference section that gives examples of the most common", "text": "reference section that gives examples of the most common"}, {"self_ref": "#/texts/287", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 521.98169, "r": 544.11987, "b": 519.94366, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 61]}], "orig": "signs, signals and road markings that keep traffi c organized", "text": "signs, signals and road markings that keep traffi c organized"}, {"self_ref": "#/texts/288", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 519.9386, "r": 515.41071, "b": 517.90063, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 21]}], "orig": "and flowing smoothly.", "text": "and flowing smoothly."}, {"self_ref": "#/texts/289", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 514.65381, "r": 505.64642000000003, "b": 511.0643, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Signs", "text": "Signs"}, {"self_ref": "#/texts/290", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 510.17813, "r": 543.92957, "b": 508.14017, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 62]}], "orig": "There are three ways to read signs: by their shape, colour and", "text": "There are three ways to read signs: by their shape, colour and"}, {"self_ref": "#/texts/291", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 508.1351, "r": 545.67834, "b": 506.09711, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 60]}], "orig": "the messages printed on them. Understanding these three ways", "text": "the messages printed on them. Understanding these three ways"}, {"self_ref": "#/texts/292", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 506.09204, "r": 545.26471, "b": 504.05408, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 66]}], "orig": "of classifying signs will help you figure out the meaning of signs", "text": "of classifying signs will help you figure out the meaning of signs"}, {"self_ref": "#/texts/293", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 498.15335, "t": 504.04901, "r": 513.31335, "b": 502.01105, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "that are new to you.", "text": "that are new to you."}, {"self_ref": "#/texts/294", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 505.43439, "t": 488.92404, "r": 508.53033000000005, "b": 487.10361, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Stop", "text": "Stop"}, {"self_ref": "#/texts/295", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 527.45502, "t": 488.74646, "r": 541.44678, "b": 486.92603, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 22]}], "orig": "Yield the right-of-way", "text": "Yield the right-of-way"}, {"self_ref": "#/texts/296", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 501.79385, "t": 470.81027, "r": 510.41632, "b": 468.98984, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Shows driving", "text": "Shows driving"}, {"self_ref": "#/texts/297", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 501.79385, "t": 469.12268000000006, "r": 509.04268999999994, "b": 467.30224999999996, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 11]}], "orig": "regulations", "text": "regulations"}, {"self_ref": "#/texts/298", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 518.66455, "t": 472.40854, "r": 529.80902, "b": 470.58809999999994, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "Explains lane use", "text": "Explains lane use"}, {"self_ref": "#/texts/299", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.87561, "t": 473.62384, "r": 546.95142, "b": 471.80341, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "School zone signs", "text": "School zone signs"}, {"self_ref": "#/texts/300", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.87561, "t": 471.9362499999999, "r": 545.05762, "b": 470.11581, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "are fl uorescent", "text": "are fl uorescent"}, {"self_ref": "#/texts/301", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.87561, "t": 470.24866, "r": 543.32263, "b": 468.42822, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "yellow-green", "text": "yellow-green"}, {"self_ref": "#/texts/302", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 499.21862999999996, "t": 453.87228, "r": 512.62451, "b": 452.05185, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "Tells about motorist", "text": "Tells about motorist"}, {"self_ref": "#/texts/303", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 499.21862999999996, "t": 452.18468999999993, "r": 504.39917, "b": 450.36426, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "services", "text": "services"}, {"self_ref": "#/texts/304", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 516.97748, "t": 453.93961, "r": 529.77484, "b": 452.11917000000005, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "Shows a permitted", "text": "Shows a permitted"}, {"self_ref": "#/texts/305", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 516.97748, "t": 452.25201, "r": 520.96399, "b": 450.43158, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 6]}], "orig": "action", "text": "action"}, {"self_ref": "#/texts/306", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.55847, "t": 454.11719, "r": 548.58453, "b": 452.2967499999999, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "Shows an action that", "text": "Shows an action that"}, {"self_ref": "#/texts/307", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.55847, "t": 452.42959999999994, "r": 545.08862, "b": 450.60916, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "is not permitted", "text": "is not permitted"}, {"self_ref": "#/texts/308", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 483.05853, "t": 435.82584, "r": 494.72577, "b": 434.0054, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "Warns of hazards", "text": "Warns of hazards"}, {"self_ref": "#/texts/309", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 483.05853, "t": 434.13821, "r": 487.07525999999996, "b": 432.31778, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "ahead", "text": "ahead"}, {"self_ref": "#/texts/310", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 499.39645, "t": 435.73702999999995, "r": 504.69171, "b": 433.9166, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "Warns of", "text": "Warns of"}, {"self_ref": "#/texts/311", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 499.39645, "t": 434.04944, "r": 511.69116, "b": 432.22900000000004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "construction zones", "text": "construction zones"}, {"self_ref": "#/texts/312", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 516.75891, "t": 435.73702999999995, "r": 527.42938, "b": 433.9166, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "Railway crossing", "text": "Railway crossing"}, {"self_ref": "#/texts/313", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.5141, "t": 439.07019, "r": 547.89862, "b": 437.24976, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "Shows distance and", "text": "Shows distance and"}, {"self_ref": "#/texts/314", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 534.5141, "t": 437.3826, "r": 540.2818, "b": 435.56216, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "direction", "text": "direction"}, {"self_ref": "#/texts/315", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.37466, "t": 521.85925, "r": 479.14251999999993, "b": 519.82123, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "\u2022", "text": "\u2022"}, {"self_ref": "#/texts/316", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.91036999999994, "t": 521.85925, "r": 483.74963, "b": 519.82123, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Signs", "text": "Signs"}, {"self_ref": "#/texts/317", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 519.15283, "r": 492.31219, "b": 517.65112, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "- regulatory signs", "text": "- regulatory signs"}, {"self_ref": "#/texts/318", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 516.85486, "r": 486.72598000000005, "b": 515.35321, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "- school,", "text": "- school,"}, {"self_ref": "#/texts/319", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 515.22028, "r": 492.93286000000006, "b": 513.18231, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "playground and", "text": "playground and"}, {"self_ref": "#/texts/320", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 513.17725, "r": 491.82938000000007, "b": 511.13925, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "crosswalk signs", "text": "crosswalk signs"}, {"self_ref": "#/texts/321", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 510.47241, "r": 491.00775000000004, "b": 508.97076, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "- lane use signs", "text": "- lane use signs"}, {"self_ref": "#/texts/322", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 508.17444, "r": 493.32748, "b": 506.6727900000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 20]}], "orig": "- turn control signs", "text": "- turn control signs"}, {"self_ref": "#/texts/323", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 505.8765, "r": 490.4915199999999, "b": 504.37482, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "- parking signs", "text": "- parking signs"}, {"self_ref": "#/texts/324", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 503.57852, "r": 491.17004000000003, "b": 502.07684, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "- reserved lane", "text": "- reserved lane"}, {"self_ref": "#/texts/325", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 501.94394000000005, "r": 484.77405000000005, "b": 499.90594, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "signs", "text": "signs"}, {"self_ref": "#/texts/326", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 499.23830999999996, "r": 490.83398, "b": 497.73666, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "- warning signs", "text": "- warning signs"}, {"self_ref": "#/texts/327", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 496.94037, "r": 491.62692, "b": 495.43869, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "- object markers", "text": "- object markers"}, {"self_ref": "#/texts/328", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 494.6424, "r": 490.37341, "b": 493.1407500000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "- construction", "text": "- construction"}, {"self_ref": "#/texts/329", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 493.00781, "r": 484.77405000000005, "b": 490.96985, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "signs", "text": "signs"}, {"self_ref": "#/texts/330", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 490.30219000000005, "r": 492.93912, "b": 488.80054, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "- information and", "text": "- information and"}, {"self_ref": "#/texts/331", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 488.6676, "r": 493.00525, "b": 486.62964, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "destination signs", "text": "destination signs"}, {"self_ref": "#/texts/332", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 485.9620100000001, "r": 489.99047999999993, "b": 484.46033, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "- railway signs", "text": "- railway signs"}, {"self_ref": "#/texts/333", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.375, "t": 483.75211, "r": 479.1032400000001, "b": 481.71414, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "\u2022", "text": "\u2022"}, {"self_ref": "#/texts/334", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.83151, "t": 483.75211, "r": 484.92925999999994, "b": 481.71414, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "Signals", "text": "Signals"}, {"self_ref": "#/texts/335", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 481.04642, "r": 490.00091999999995, "b": 479.54474, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "- lane control", "text": "- lane control"}, {"self_ref": "#/texts/336", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 479.4118000000001, "r": 485.95331, "b": 477.37384, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "signals", "text": "signals"}, {"self_ref": "#/texts/337", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 476.70621, "r": 489.29876999999993, "b": 475.20456, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "- traffic lights", "text": "- traffic lights"}, {"self_ref": "#/texts/338", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.375, "t": 474.49634, "r": 479.18129999999996, "b": 472.4583400000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "\u2022", "text": "\u2022"}, {"self_ref": "#/texts/339", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.98761, "t": 474.49634, "r": 490.46960000000007, "b": 472.4583400000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Road markings", "text": "Road markings"}, {"self_ref": "#/texts/340", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 471.79062, "r": 489.26166000000006, "b": 470.28897, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "- yellow lines", "text": "- yellow lines"}, {"self_ref": "#/texts/341", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 469.49268, "r": 488.59189, "b": 467.991, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "- white lines", "text": "- white lines"}, {"self_ref": "#/texts/342", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 467.1947, "r": 491.17004000000003, "b": 465.69302, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "- reserved lane", "text": "- reserved lane"}, {"self_ref": "#/texts/343", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 481.21602999999993, "t": 465.56012, "r": 487.58978, "b": 463.52216, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "markings", "text": "markings"}, {"self_ref": "#/texts/344", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 479.97293, "t": 462.85449, "r": 491.75177, "b": 461.35284, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 16]}], "orig": "- other markings", "text": "- other markings"}, {"self_ref": "#/texts/345", "parent": {"cref": "#/pictures/0"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 478.15246999999994, "t": 526.92969, "r": 493.75586, "b": 523.93127, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 15]}], "orig": "in this chapter", "text": "in this chapter"}, {"self_ref": "#/texts/346", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 317.9549865722656, "t": 199.53408813476562, "r": 379.82049560546875, "b": 189.22499084472656, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 8]}], "orig": "KEYWORDS", "text": "KEYWORDS", "level": 1}, {"self_ref": "#/texts/347", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 317.9549865722656, "t": 184.3324432373047, "r": 559.1859741210938, "b": 164.9988250732422, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 90]}], "orig": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning"}, {"self_ref": "#/texts/348", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 317.65997314453125, "t": 151.94566345214844, "r": 404.6536560058594, "b": 144.41390991210938, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 21]}], "orig": "ACM Reference Format:", "text": "ACM Reference Format:", "level": 1}, {"self_ref": "#/texts/349", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 317.9549865722656, "t": 141.88003540039062, "r": 559.5494995117188, "b": 84.62297058105469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 374]}], "orig": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043"}, {"self_ref": "#/texts/350", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 2, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 558.202880859375, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 130]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar"}, {"self_ref": "#/texts/351", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 2, "bbox": {"l": 53.79800033569336, "t": 706.14013671875, "r": 156.52899169921875, "b": 695.8309936523438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "1 INTRODUCTION", "text": "1 INTRODUCTION", "level": 1}, {"self_ref": "#/texts/352", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 53.52899932861328, "t": 681.0164794921875, "r": 303.0169677734375, "b": 563.0528564453125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 702]}], "orig": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1.", "text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1."}, {"self_ref": "#/texts/353", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 53.52899932861328, "t": 560.4684448242188, "r": 295.5641174316406, "b": 289.0808410644531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1580]}], "orig": "A key problem in the process of document conversion is to understand the structure of a single document page, i.e. which segments of text should be grouped together in a unit. To train models for this task, there are currently two large datasets available to the community, PubLayNet [6] and DocBank [7]. They were introduced in 2019 and 2020 respectively and significantly accelerated the implementation of layout detection and segmentation models due to their sizes of 300K and 500K ground-truth pages. These sizes were achieved by leveraging an automation approach. The benefit of automated ground-truth generation is obvious: one can generate large ground-truth datasets at virtually no cost. However, the automation introduces a constraint on the variability in the dataset, because corresponding structured source data must be available. PubLayNet and DocBank were both generated from scientific document repositories (PubMed and arXiv), which provide XML or L A T E X sources. Those scientific documents present a limited variability in their layouts, because they are typeset in uniform templates provided by the publishers. Obviously, documents such as technical manuals, annual company reports, legal text, government tenders, etc. have very different and partially unique layouts. As a consequence, the layout predictions obtained from models trained on PubLayNet or DocBank is very reasonable when applied on scientific documents. However, for more artistic or free-style layouts, we see sub-par prediction quality from these models, which we demonstrate in Section 5.", "text": "A key problem in the process of document conversion is to understand the structure of a single document page, i.e. which segments of text should be grouped together in a unit. To train models for this task, there are currently two large datasets available to the community, PubLayNet [6] and DocBank [7]. They were introduced in 2019 and 2020 respectively and significantly accelerated the implementation of layout detection and segmentation models due to their sizes of 300K and 500K ground-truth pages. These sizes were achieved by leveraging an automation approach. The benefit of automated ground-truth generation is obvious: one can generate large ground-truth datasets at virtually no cost. However, the automation introduces a constraint on the variability in the dataset, because corresponding structured source data must be available. PubLayNet and DocBank were both generated from scientific document repositories (PubMed and arXiv), which provide XML or L A T E X sources. Those scientific documents present a limited variability in their layouts, because they are typeset in uniform templates provided by the publishers. Obviously, documents such as technical manuals, annual company reports, legal text, government tenders, etc. have very different and partially unique layouts. As a consequence, the layout predictions obtained from models trained on PubLayNet or DocBank is very reasonable when applied on scientific documents. However, for more artistic or free-style layouts, we see sub-par prediction quality from these models, which we demonstrate in Section 5."}, {"self_ref": "#/texts/354", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 53.59199905395508, "t": 286.4964599609375, "r": 295.56396484375, "b": 212.36782836914062, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 462]}], "orig": "In this paper, we present the DocLayNet dataset. It provides pageby-page layout annotation ground-truth using bounding-boxes for 11 distinct class labels on 80863 unique document pages, of which a fraction carry double- or triple-annotations. DocLayNet is similar in spirit to PubLayNet and DocBank and will likewise be made available to the public 1 in order to stimulate the document-layout analysis community. It distinguishes itself in the following aspects:", "text": "In this paper, we present the DocLayNet dataset. It provides pageby-page layout annotation ground-truth using bounding-boxes for 11 distinct class labels on 80863 unique document pages, of which a fraction carry double- or triple-annotations. DocLayNet is similar in spirit to PubLayNet and DocBank and will likewise be made available to the public 1 in order to stimulate the document-layout analysis community. It distinguishes itself in the following aspects:"}, {"self_ref": "#/texts/355", "parent": {"cref": "#/groups/0"}, "children": [], "label": "list_item", "prov": [{"page_no": 2, "bbox": {"l": 64.70800018310547, "t": 207.41844177246094, "r": 295.5616455078125, "b": 177.12582397460938, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 149]}], "orig": "(1) Human Annotation : In contrast to PubLayNet and DocBank, we relied on human annotation instead of automation approaches to generate the data set.", "text": "(1) Human Annotation : In contrast to PubLayNet and DocBank, we relied on human annotation instead of automation approaches to generate the data set.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/356", "parent": {"cref": "#/groups/0"}, "children": [], "label": "list_item", "prov": [{"page_no": 2, "bbox": {"l": 64.70800018310547, "t": 174.54144287109375, "r": 294.2625427246094, "b": 155.20883178710938, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 109]}], "orig": "(2) Large Layout Variability : We include diverse and complex layouts from a large variety of public sources.", "text": "(2) Large Layout Variability : We include diverse and complex layouts from a large variety of public sources.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/357", "parent": {"cref": "#/groups/0"}, "children": [], "label": "list_item", "prov": [{"page_no": 2, "bbox": {"l": 64.70800018310547, "t": 152.62445068359375, "r": 294.6838073730469, "b": 122.33183288574219, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 180]}], "orig": "(3) Detailed Label Set : We define 11 class labels to distinguish layout features in high detail. PubLayNet provides 5 labels; DocBank provides 13, although not a superset of ours.", "text": "(3) Detailed Label Set : We define 11 class labels to distinguish layout features in high detail. PubLayNet provides 5 labels; DocBank provides 13, although not a superset of ours.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/358", "parent": {"cref": "#/groups/0"}, "children": [], "label": "list_item", "prov": [{"page_no": 2, "bbox": {"l": 64.70800018310547, "t": 119.7474365234375, "r": 295.56439208984375, "b": 100.41383361816406, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 115]}], "orig": "(4) Redundant Annotations : A fraction of the pages in the DocLayNet data set carry more than one human annotation.", "text": "(4) Redundant Annotations : A fraction of the pages in the DocLayNet data set carry more than one human annotation.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/359", "parent": {"cref": "#/body"}, "children": [], "label": "footnote", "prov": [{"page_no": 2, "bbox": {"l": 53.672000885009766, "t": 89.77363586425781, "r": 216.02749633789062, "b": 83.2601089477539, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 60]}], "orig": "$^{1}$https://developer.ibm.com/exchanges/data/all/doclaynet", "text": "$^{1}$https://developer.ibm.com/exchanges/data/all/doclaynet"}, {"self_ref": "#/texts/360", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 342.0950012207031, "t": 704.636474609375, "r": 558.4320068359375, "b": 685.3028564453125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 86]}], "orig": "This enables experimentation with annotation uncertainty and quality control analysis.", "text": "This enables experimentation with annotation uncertainty and quality control analysis."}, {"self_ref": "#/texts/361", "parent": {"cref": "#/groups/1"}, "children": [], "label": "list_item", "prov": [{"page_no": 2, "bbox": {"l": 328.8650207519531, "t": 682.718505859375, "r": 559.7210083007812, "b": 630.5088500976562, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 280]}], "orig": "(5) Pre-defined Train-, Test- & Validation-set : Like DocBank, we provide fixed train-, test- & validation-sets to ensure proportional representation of the class-labels. Further, we prevent leakage of unique layouts across sets, which has a large effect on model accuracy scores.", "text": "(5) Pre-defined Train-, Test- & Validation-set : Like DocBank, we provide fixed train-, test- & validation-sets to ensure proportional representation of the class-labels. Further, we prevent leakage of unique layouts across sets, which has a large effect on model accuracy scores.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/362", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.62298583984375, "t": 624.0244750976562, "r": 559.1903076171875, "b": 571.8138427734375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 297]}], "orig": "All aspects outlined above are detailed in Section 3. In Section 4, we will elaborate on how we designed and executed this large-scale human annotation campaign. We will also share key insights and lessons learned that might prove helpful for other parties planning to set up annotation campaigns.", "text": "All aspects outlined above are detailed in Section 3. In Section 4, we will elaborate on how we designed and executed this large-scale human annotation campaign. We will also share key insights and lessons learned that might prove helpful for other parties planning to set up annotation campaigns."}, {"self_ref": "#/texts/363", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.7309875488281, "t": 569.2294311523438, "r": 559.5819702148438, "b": 484.142822265625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 506]}], "orig": "In Section 5, we will present baseline accuracy numbers for a variety of object detection methods (Faster R-CNN, Mask R-CNN and YOLOv5) trained on DocLayNet. We further show how the model performance is impacted by varying the DocLayNet dataset size, reducing the label set and modifying the train/test-split. Last but not least, we compare the performance of models trained on PubLayNet, DocBank and DocLayNet and demonstrate that a model trained on DocLayNet provides overall more robust layout recovery.", "text": "In Section 5, we will present baseline accuracy numbers for a variety of object detection methods (Faster R-CNN, Mask R-CNN and YOLOv5) trained on DocLayNet. We further show how the model performance is impacted by varying the DocLayNet dataset size, reducing the label set and modifying the train/test-split. Last but not least, we compare the performance of models trained on PubLayNet, DocBank and DocLayNet and demonstrate that a model trained on DocLayNet provides overall more robust layout recovery."}, {"self_ref": "#/texts/364", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 2, "bbox": {"l": 317.9549865722656, "t": 470.7911071777344, "r": 421.7441101074219, "b": 460.4820251464844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "2 RELATED WORK", "text": "2 RELATED WORK", "level": 1}, {"self_ref": "#/texts/365", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.5249938964844, "t": 445.6674499511719, "r": 559.7161254882812, "b": 327.7038269042969, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 655]}], "orig": "While early approaches in document-layout analysis used rulebased algorithms and heuristics [8], the problem is lately addressed with deep learning methods. The most common approach is to leverage object detection models [9-15]. In the last decade, the accuracy and speed of these models has increased dramatically. Furthermore, most state-of-the-art object detection methods can be trained and applied with very little work, thanks to a standardisation effort of the ground-truth data format [16] and common deep-learning frameworks [17]. Reference data sets such as PubLayNet [6] and DocBank provide their data in the commonly accepted COCO format [16].", "text": "While early approaches in document-layout analysis used rulebased algorithms and heuristics [8], the problem is lately addressed with deep learning methods. The most common approach is to leverage object detection models [9-15]. In the last decade, the accuracy and speed of these models has increased dramatically. Furthermore, most state-of-the-art object detection methods can be trained and applied with very little work, thanks to a standardisation effort of the ground-truth data format [16] and common deep-learning frameworks [17]. Reference data sets such as PubLayNet [6] and DocBank provide their data in the commonly accepted COCO format [16]."}, {"self_ref": "#/texts/366", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.9549865722656, "t": 325.1194763183594, "r": 559.1864624023438, "b": 240.03182983398438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 500]}], "orig": "Lately, new types of ML models for document-layout analysis have emerged in the community [18-21]. These models do not approach the problem of layout analysis purely based on an image representation of the page, as computer vision methods do. Instead, they combine the text tokens and image representation of a page in order to obtain a segmentation. While the reported accuracies appear to be promising, a broadly accepted data format which links geometric and textual features has yet to establish.", "text": "Lately, new types of ML models for document-layout analysis have emerged in the community [18-21]. These models do not approach the problem of layout analysis purely based on an image representation of the page, as computer vision methods do. Instead, they combine the text tokens and image representation of a page in order to obtain a segmentation. While the reported accuracies appear to be promising, a broadly accepted data format which links geometric and textual features has yet to establish."}, {"self_ref": "#/texts/367", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 2, "bbox": {"l": 317.9549865722656, "t": 226.6800994873047, "r": 477.4568786621094, "b": 216.37100219726562, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 23]}], "orig": "3 THE DOCLAYNET DATASET", "text": "3 THE DOCLAYNET DATASET", "level": 1}, {"self_ref": "#/texts/368", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.9549865722656, "t": 201.5564422607422, "r": 559.7131958007812, "b": 116.46983337402344, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 522]}], "orig": "DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape of labeled, rectangular boundingboxes. We define 11 distinct labels for layout features, namely Caption , Footnote , Formula , List-item , Page-footer , Page-header , Picture , Section-header , Table , Text , and Title . Our reasoning for picking this particular label set is detailed in Section 4.", "text": "DocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape of labeled, rectangular boundingboxes. We define 11 distinct labels for layout features, namely Caption , Footnote , Formula , List-item , Page-footer , Page-header , Picture , Section-header , Table , Text , and Title . Our reasoning for picking this particular label set is detailed in Section 4."}, {"self_ref": "#/texts/369", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 317.9549865722656, "t": 113.88543701171875, "r": 558.2041015625, "b": 83.59282684326172, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 186]}], "orig": "In addition to open intellectual property constraints for the source documents, we required that the documents in DocLayNet adhere to a few conditions. Firstly, we kept scanned documents", "text": "In addition to open intellectual property constraints for the source documents, we required that the documents in DocLayNet adhere to a few conditions. Firstly, we kept scanned documents"}, {"self_ref": "#/texts/370", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 3, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 347.0172424316406, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"}, {"self_ref": "#/texts/371", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 3, "bbox": {"l": 365.75701904296875, "t": 731.6909790039062, "r": 558.2028198242188, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 48]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA"}, {"self_ref": "#/texts/372", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 3, "bbox": {"l": 53.79800033569336, "t": 555.885009765625, "r": 294.0437316894531, "b": 536.4527587890625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 69]}], "orig": "Figure 2: Distribution of DocLayNet pages across document categories.", "text": "Figure 2: Distribution of DocLayNet pages across document categories."}, {"self_ref": "#/texts/373", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 237.11293, "t": 658.91284, "r": 262.97623, "b": 650.3858, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "Patents", "text": "Patents"}, {"self_ref": "#/texts/374", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 202.87892, "t": 651.53821, "r": 213.89999, "b": 643.01117, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "8%", "text": "8%"}, {"self_ref": "#/texts/375", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 207.13306, "t": 698.8423499999999, "r": 237.64882999999998, "b": 690.31531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "Scientific", "text": "Scientific"}, {"self_ref": "#/texts/376", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 184.40349, "t": 673.31793, "r": 199.66519, "b": 664.79089, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "17%", "text": "17%"}, {"self_ref": "#/texts/377", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 88.288223, "t": 677.6452600000001, "r": 118.80401, "b": 669.1182300000002, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 9]}], "orig": "Financial", "text": "Financial"}, {"self_ref": "#/texts/378", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 136.24422, "t": 661.75592, "r": 151.50592, "b": 653.22888, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "32%", "text": "32%"}, {"self_ref": "#/texts/379", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 93.973373, "t": 604.34235, "r": 121.11515, "b": 595.81531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "Tenders", "text": "Tenders"}, {"self_ref": "#/texts/380", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 139.6235, "t": 621.77252, "r": 150.64458, "b": 613.24548, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "6%", "text": "6%"}, {"self_ref": "#/texts/381", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 139.88339, "t": 579.49963, "r": 157.68491, "b": 570.9726, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 4]}], "orig": "Laws", "text": "Laws"}, {"self_ref": "#/texts/382", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 157.43983, "t": 608.22192, "r": 172.70154, "b": 599.69489, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "16%", "text": "16%"}, {"self_ref": "#/texts/383", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 225.47252, "t": 602.70343, "r": 254.29510000000002, "b": 594.17639, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "Manuals", "text": "Manuals"}, {"self_ref": "#/texts/384", "parent": {"cref": "#/pictures/1"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 194.40683, "t": 620.87854, "r": 209.66853, "b": 612.3515, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "21%", "text": "21%"}, {"self_ref": "#/texts/385", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 53.79800033569336, "t": 510.19647216796875, "r": 294.2738342285156, "b": 425.1098327636719, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 513]}], "orig": "to a minimum, since they introduce difficulties in annotation (see Section 4). As a second condition, we focussed on medium to large documents ( > 10 pages) with technical content, dense in complex tables, figures, plots and captions. Such documents carry a lot of information value, but are often hard to analyse with high accuracy due to their challenging layouts. Counterexamples of documents not included in the dataset are receipts, invoices, hand-written documents or photographs showing \"text in the wild\".", "text": "to a minimum, since they introduce difficulties in annotation (see Section 4). As a second condition, we focussed on medium to large documents ( > 10 pages) with technical content, dense in complex tables, figures, plots and captions. Such documents carry a lot of information value, but are often hard to analyse with high accuracy due to their challenging layouts. Counterexamples of documents not included in the dataset are receipts, invoices, hand-written documents or photographs showing \"text in the wild\"."}, {"self_ref": "#/texts/386", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 53.57400131225586, "t": 422.52545166015625, "r": 295.5604553222656, "b": 282.6438293457031, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 810]}], "orig": "The pages in DocLayNet can be grouped into six distinct categories, namely Financial Reports , Manuals , Scientific Articles , Laws & Regulations , Patents and Government Tenders . Each document category was sourced from various repositories. For example, Financial Reports contain both free-style format annual reports 2 which expose company-specific, artistic layouts as well as the more formal SEC filings. The two largest categories ( Financial Reports and Manuals ) contain a large amount of free-style layouts in order to obtain maximum variability. In the other four categories, we boosted the variability by mixing documents from independent providers, such as different government websites or publishers. In Figure 2, we show the document categories contained in DocLayNet with their respective sizes.", "text": "The pages in DocLayNet can be grouped into six distinct categories, namely Financial Reports , Manuals , Scientific Articles , Laws & Regulations , Patents and Government Tenders . Each document category was sourced from various repositories. For example, Financial Reports contain both free-style format annual reports 2 which expose company-specific, artistic layouts as well as the more formal SEC filings. The two largest categories ( Financial Reports and Manuals ) contain a large amount of free-style layouts in order to obtain maximum variability. In the other four categories, we boosted the variability by mixing documents from independent providers, such as different government websites or publishers. In Figure 2, we show the document categories contained in DocLayNet with their respective sizes."}, {"self_ref": "#/texts/387", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 53.46699905395508, "t": 280.0594482421875, "r": 295.5615539550781, "b": 184.01382446289062, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 535]}], "orig": "We did not control the document selection with regard to language. The vast majority of documents contained in DocLayNet (close to 95%) are published in English language. However, DocLayNet also contains a number of documents in other languages such as German (2.5%), French (1.0%) and Japanese (1.0%). While the document language has negligible impact on the performance of computer vision methods such as object detection and segmentation models, it might prove challenging for layout analysis methods which exploit textual features.", "text": "We did not control the document selection with regard to language. The vast majority of documents contained in DocLayNet (close to 95%) are published in English language. However, DocLayNet also contains a number of documents in other languages such as German (2.5%), French (1.0%) and Japanese (1.0%). While the document language has negligible impact on the performance of computer vision methods such as object detection and segmentation models, it might prove challenging for layout analysis methods which exploit textual features."}, {"self_ref": "#/texts/388", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 53.79800033569336, "t": 181.429443359375, "r": 295.56396484375, "b": 107.30182647705078, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 413]}], "orig": "To ensure that future benchmarks in the document-layout analysis community can be easily compared, we have split up DocLayNet into pre-defined train-, test- and validation-sets. In this way, we can avoid spurious variations in the evaluation scores due to random splitting in train-, test- and validation-sets. We also ensured that less frequent labels are represented in train and test sets in equal proportions.", "text": "To ensure that future benchmarks in the document-layout analysis community can be easily compared, we have split up DocLayNet into pre-defined train-, test- and validation-sets. In this way, we can avoid spurious variations in the evaluation scores due to random splitting in train-, test- and validation-sets. We also ensured that less frequent labels are represented in train and test sets in equal proportions."}, {"self_ref": "#/texts/389", "parent": {"cref": "#/body"}, "children": [], "label": "footnote", "prov": [{"page_no": 3, "bbox": {"l": 53.79800033569336, "t": 90.34363555908203, "r": 195.78997802734375, "b": 83.83010864257812, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 51]}], "orig": "$^{2}$e.g. AAPL from https://www.annualreports.com/", "text": "$^{2}$e.g. AAPL from https://www.annualreports.com/"}, {"self_ref": "#/texts/390", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 317.62298583984375, "t": 704.636474609375, "r": 559.1918334960938, "b": 630.5088500976562, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 435]}], "orig": "Table 1 shows the overall frequency and distribution of the labels among the different sets. Importantly, we ensure that subsets are only split on full-document boundaries. This avoids that pages of the same document are spread over train, test and validation set, which can give an undesired evaluation advantage to models and lead to overestimation of their prediction accuracy. We will show the impact of this decision in Section 5.", "text": "Table 1 shows the overall frequency and distribution of the labels among the different sets. Importantly, we ensure that subsets are only split on full-document boundaries. This avoids that pages of the same document are spread over train, test and validation set, which can give an undesired evaluation advantage to models and lead to overestimation of their prediction accuracy. We will show the impact of this decision in Section 5."}, {"self_ref": "#/texts/391", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 317.9549865722656, "t": 627.9244384765625, "r": 558.4381103515625, "b": 520.9197998046875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 645]}], "orig": "In order to accommodate the different types of models currently in use by the community, we provide DocLayNet in an augmented COCO format [16]. This entails the standard COCO ground-truth file (in JSON format) with the associated page images (in PNG format, 1025 \u00d7 1025 pixels). Furthermore, custom fields have been added to each COCO record to specify document category, original document filename and page number. In addition, we also provide the original PDF pages, as well as sidecar files containing parsed PDF text and text-cell coordinates (in JSON). All additional files are linked to the primary page images by their matching filenames.", "text": "In order to accommodate the different types of models currently in use by the community, we provide DocLayNet in an augmented COCO format [16]. This entails the standard COCO ground-truth file (in JSON format) with the associated page images (in PNG format, 1025 \u00d7 1025 pixels). Furthermore, custom fields have been added to each COCO record to specify document category, original document filename and page number. In addition, we also provide the original PDF pages, as well as sidecar files containing parsed PDF text and text-cell coordinates (in JSON). All additional files are linked to the primary page images by their matching filenames."}, {"self_ref": "#/texts/392", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 316.9419860839844, "t": 518.33544921875, "r": 559.7215576171875, "b": 203.11082458496094, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1854]}], "orig": "Despite being cost-intense and far less scalable than automation, human annotation has several benefits over automated groundtruth generation. The first and most obvious reason to leverage human annotations is the freedom to annotate any type of document without requiring a programmatic source. For most PDF documents, the original source document is not available. The latter is not a hard constraint with human annotation, but it is for automated methods. A second reason to use human annotations is that the latter usually provide a more natural interpretation of the page layout. The human-interpreted layout can significantly deviate from the programmatic layout used in typesetting. For example, \"invisible\" tables might be used solely for aligning text paragraphs on columns. Such typesetting tricks might be interpreted by automated methods incorrectly as an actual table, while the human annotation will interpret it correctly as Text or other styles. The same applies to multi-line text elements, when authors decided to space them as \"invisible\" list elements without bullet symbols. A third reason to gather ground-truth through human annotation is to estimate a \"natural\" upper bound on the segmentation accuracy. As we will show in Section 4, certain documents featuring complex layouts can have different but equally acceptable layout interpretations. This natural upper bound for segmentation accuracy can be found by annotating the same pages multiple times by different people and evaluating the inter-annotator agreement. Such a baseline consistency evaluation is very useful to define expectations for a good target accuracy in trained deep neural network models and avoid overfitting (see Table 1). On the flip side, achieving high annotation consistency proved to be a key challenge in human annotation, as we outline in Section 4.", "text": "Despite being cost-intense and far less scalable than automation, human annotation has several benefits over automated groundtruth generation. The first and most obvious reason to leverage human annotations is the freedom to annotate any type of document without requiring a programmatic source. For most PDF documents, the original source document is not available. The latter is not a hard constraint with human annotation, but it is for automated methods. A second reason to use human annotations is that the latter usually provide a more natural interpretation of the page layout. The human-interpreted layout can significantly deviate from the programmatic layout used in typesetting. For example, \"invisible\" tables might be used solely for aligning text paragraphs on columns. Such typesetting tricks might be interpreted by automated methods incorrectly as an actual table, while the human annotation will interpret it correctly as Text or other styles. The same applies to multi-line text elements, when authors decided to space them as \"invisible\" list elements without bullet symbols. A third reason to gather ground-truth through human annotation is to estimate a \"natural\" upper bound on the segmentation accuracy. As we will show in Section 4, certain documents featuring complex layouts can have different but equally acceptable layout interpretations. This natural upper bound for segmentation accuracy can be found by annotating the same pages multiple times by different people and evaluating the inter-annotator agreement. Such a baseline consistency evaluation is very useful to define expectations for a good target accuracy in trained deep neural network models and avoid overfitting (see Table 1). On the flip side, achieving high annotation consistency proved to be a key challenge in human annotation, as we outline in Section 4."}, {"self_ref": "#/texts/393", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 3, "bbox": {"l": 317.9549865722656, "t": 185.15008544921875, "r": 470.2132568359375, "b": 174.8409881591797, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 21]}], "orig": "4 ANNOTATION CAMPAIGN", "text": "4 ANNOTATION CAMPAIGN", "level": 1}, {"self_ref": "#/texts/394", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 3, "bbox": {"l": 317.6860046386719, "t": 160.0264434814453, "r": 559.7138061523438, "b": 85.8978271484375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 457]}], "orig": "The annotation campaign was carried out in four phases. In phase one, we identified and prepared the data sources for annotation. In phase two, we determined the class labels and how annotations should be done on the documents in order to obtain maximum consistency. The latter was guided by a detailed requirement analysis and exhaustive experiments. In phase three, we trained the annotation staff and performed exams for quality assurance. In phase four,", "text": "The annotation campaign was carried out in four phases. In phase one, we identified and prepared the data sources for annotation. In phase two, we determined the class labels and how annotations should be done on the documents in order to obtain maximum consistency. The latter was guided by a detailed requirement analysis and exhaustive experiments. In phase three, we trained the annotation staff and performed exams for quality assurance. In phase four,"}, {"self_ref": "#/texts/395", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 4, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 558.202880859375, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 130]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar"}, {"self_ref": "#/texts/396", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 4, "bbox": {"l": 53.50199890136719, "t": 707.0450439453125, "r": 558.4896850585938, "b": 676.65380859375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 348]}], "orig": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges.", "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges."}, {"self_ref": "#/texts/397", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 4, "bbox": {"l": 53.79800033569336, "t": 237.99000549316406, "r": 295.64874267578125, "b": 185.68075561523438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 281]}], "orig": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right.", "text": "Figure 3: Corpus Conversion Service annotation user interface. The PDF page is shown in the background, with overlaid text-cells (in darker shades). The annotation boxes can be drawn by dragging a rectangle over each segment with the respective label from the palette on the right."}, {"self_ref": "#/texts/398", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 4, "bbox": {"l": 53.46699905395508, "t": 157.7084503173828, "r": 294.0474548339844, "b": 116.45683288574219, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 231]}], "orig": "we distributed the annotation workload and performed continuous quality controls. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised.", "text": "we distributed the annotation workload and performed continuous quality controls. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised."}, {"self_ref": "#/texts/399", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 4, "bbox": {"l": 53.79800033569336, "t": 113.989013671875, "r": 295.5584411621094, "b": 83.57982635498047, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 193]}], "orig": "Phase 1: Data selection and preparation. Our inclusion criteria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources", "text": "Phase 1: Data selection and preparation. Our inclusion criteria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources"}, {"self_ref": "#/texts/400", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 4, "bbox": {"l": 317.9549865722656, "t": 479.92047119140625, "r": 559.1853637695312, "b": 416.7518310546875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 376]}], "orig": "include publication repositories such as arXiv$^{3}$, government offices, company websites as well as data directory services for financial reports and patents. Scanned documents were excluded wherever possible because they can be rotated or skewed. This would not allow us to perform annotation with rectangular bounding-boxes and therefore complicate the annotation process.", "text": "include publication repositories such as arXiv$^{3}$, government offices, company websites as well as data directory services for financial reports and patents. Scanned documents were excluded wherever possible because they can be rotated or skewed. This would not allow us to perform annotation with rectangular bounding-boxes and therefore complicate the annotation process."}, {"self_ref": "#/texts/401", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 4, "bbox": {"l": 317.9549865722656, "t": 414.1674499511719, "r": 559.7130737304688, "b": 285.2448425292969, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 746]}], "orig": "Preparation work included uploading and parsing the sourced PDF documents in the Corpus Conversion Service (CCS) [22], a cloud-native platform which provides a visual annotation interface and allows for dataset inspection and analysis. The annotation interface of CCS is shown in Figure 3. The desired balance of pages between the different document categories was achieved by selective subsampling of pages with certain desired properties. For example, we made sure to include the title page of each document and bias the remaining page selection to those with figures or tables. The latter was achieved by leveraging pre-trained object detection models from PubLayNet, which helped us estimate how many figures and tables a given page contains.", "text": "Preparation work included uploading and parsing the sourced PDF documents in the Corpus Conversion Service (CCS) [22], a cloud-native platform which provides a visual annotation interface and allows for dataset inspection and analysis. The annotation interface of CCS is shown in Figure 3. The desired balance of pages between the different document categories was achieved by selective subsampling of pages with certain desired properties. For example, we made sure to include the title page of each document and bias the remaining page selection to those with figures or tables. The latter was achieved by leveraging pre-trained object detection models from PubLayNet, which helped us estimate how many figures and tables a given page contains."}, {"self_ref": "#/texts/402", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 4, "bbox": {"l": 317.62298583984375, "t": 282.7770080566406, "r": 559.7176513671875, "b": 98.9438247680664, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1159]}], "orig": "Phase 2: Label selection and guideline. We reviewed the collected documents and identified the most common structural features they exhibit. This was achieved by identifying recurrent layout elements and lead us to the definition of 11 distinct class labels. These 11 class labels are Caption , Footnote , Formula , List-item , Pagefooter , Page-header , Picture , Section-header , Table , Text , and Title . Critical factors that were considered for the choice of these class labels were (1) the overall occurrence of the label, (2) the specificity of the label, (3) recognisability on a single page (i.e. no need for context from previous or next page) and (4) overall coverage of the page. Specificity ensures that the choice of label is not ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and Affiliation , as seen in DocBank, are often only distinguishable by discriminating on", "text": "Phase 2: Label selection and guideline. We reviewed the collected documents and identified the most common structural features they exhibit. This was achieved by identifying recurrent layout elements and lead us to the definition of 11 distinct class labels. These 11 class labels are Caption , Footnote , Formula , List-item , Pagefooter , Page-header , Picture , Section-header , Table , Text , and Title . Critical factors that were considered for the choice of these class labels were (1) the overall occurrence of the label, (2) the specificity of the label, (3) recognisability on a single page (i.e. no need for context from previous or next page) and (4) overall coverage of the page. Specificity ensures that the choice of label is not ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and Affiliation , as seen in DocBank, are often only distinguishable by discriminating on"}, {"self_ref": "#/texts/403", "parent": {"cref": "#/body"}, "children": [], "label": "footnote", "prov": [{"page_no": 4, "bbox": {"l": 317.9549865722656, "t": 89.64663696289062, "r": 369.2456970214844, "b": 83.13311004638672, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 24]}], "orig": "$^{3}$https://arxiv.org/", "text": "$^{3}$https://arxiv.org/"}, {"self_ref": "#/texts/404", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 5, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 347.0172424316406, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"}, {"self_ref": "#/texts/405", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 5, "bbox": {"l": 365.75701904296875, "t": 731.6909790039062, "r": 558.2028198242188, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 48]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA"}, {"self_ref": "#/texts/406", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 53.79800033569336, "t": 704.636474609375, "r": 294.04541015625, "b": 685.2938842773438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 135]}], "orig": "the textual content of an element, which goes beyond visual layout recognition, in particular outside the Scientific Articles category.", "text": "the textual content of an element, which goes beyond visual layout recognition, in particular outside the Scientific Articles category."}, {"self_ref": "#/texts/407", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 53.79800033569336, "t": 682.7184448242188, "r": 295.5592346191406, "b": 542.8378295898438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 812]}], "orig": "At first sight, the task of visual document-layout interpretation appears intuitive enough to obtain plausible annotations in most cases. However, during early trial-runs in the core team, we observed many cases in which annotators use different annotation styles, especially for documents with challenging layouts. For example, if a figure is presented with subfigures, one annotator might draw a single figure bounding-box, while another might annotate each subfigure separately. The same applies for lists, where one might annotate all list items in one block or each list item separately. In essence, we observed that challenging layouts would be annotated in different but plausible ways. To illustrate this, we show in Figure 4 multiple examples of plausible but inconsistent annotations on the same pages.", "text": "At first sight, the task of visual document-layout interpretation appears intuitive enough to obtain plausible annotations in most cases. However, during early trial-runs in the core team, we observed many cases in which annotators use different annotation styles, especially for documents with challenging layouts. For example, if a figure is presented with subfigures, one annotator might draw a single figure bounding-box, while another might annotate each subfigure separately. The same applies for lists, where one might annotate all list items in one block or each list item separately. In essence, we observed that challenging layouts would be annotated in different but plausible ways. To illustrate this, we show in Figure 4 multiple examples of plausible but inconsistent annotations on the same pages."}, {"self_ref": "#/texts/408", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 53.79800033569336, "t": 540.2534790039062, "r": 295.56005859375, "b": 455.16583251953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 465]}], "orig": "Obviously, this inconsistency in annotations is not desirable for datasets which are intended to be used for model training. To minimise these inconsistencies, we created a detailed annotation guideline. While perfect consistency across 40 annotation staff members is clearly not possible to achieve, we saw a huge improvement in annotation consistency after the introduction of our annotation guideline. A few selected, non-trivial highlights of the guideline are:", "text": "Obviously, this inconsistency in annotations is not desirable for datasets which are intended to be used for model training. To minimise these inconsistencies, we created a detailed annotation guideline. While perfect consistency across 40 annotation staff members is clearly not possible to achieve, we saw a huge improvement in annotation consistency after the introduction of our annotation guideline. A few selected, non-trivial highlights of the guideline are:"}, {"self_ref": "#/texts/409", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.70800018310547, "t": 443.4874572753906, "r": 294.04620361328125, "b": 402.22686767578125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 202]}], "orig": "(1) Every list-item is an individual object instance with class label List-item . This definition is different from PubLayNet and DocBank, where all list-items are grouped together into one List object.", "text": "(1) Every list-item is an individual object instance with class label List-item . This definition is different from PubLayNet and DocBank, where all list-items are grouped together into one List object.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/410", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.70799255371094, "t": 399.6514892578125, "r": 295.563720703125, "b": 358.39984130859375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 208]}], "orig": "(2) A List-item is a paragraph with hanging indentation. Singleline elements can qualify as List-item if the neighbour elements expose hanging indentation. Bullet or enumeration symbols are not a requirement.", "text": "(2) A List-item is a paragraph with hanging indentation. Singleline elements can qualify as List-item if the neighbour elements expose hanging indentation. Bullet or enumeration symbols are not a requirement.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/411", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.70800018310547, "t": 355.81549072265625, "r": 294.0472412109375, "b": 336.4728698730469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 82]}], "orig": "(3) For every Caption , there must be exactly one corresponding Picture or Table .", "text": "(3) For every Caption , there must be exactly one corresponding Picture or Table .", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/412", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.70800018310547, "t": 333.8984680175781, "r": 294.0459899902344, "b": 314.5648193359375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 70]}], "orig": "(4) Connected sub-pictures are grouped together in one Picture object.", "text": "(4) Connected sub-pictures are grouped together in one Picture object.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/413", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.70800018310547, "t": 311.98046875, "r": 264.5057067871094, "b": 303.59686279296875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 53]}], "orig": "(5) Formula numbers are included in a Formula object.", "text": "(5) Formula numbers are included in a Formula object.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/414", "parent": {"cref": "#/groups/2"}, "children": [], "label": "list_item", "prov": [{"page_no": 5, "bbox": {"l": 64.7080078125, "t": 301.021484375, "r": 294.0461730957031, "b": 270.72882080078125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 160]}], "orig": "(6) Emphasised text (e.g. in italic or bold) at the beginning of a paragraph is not considered a Section-header , unless it appears exclusively on its own line.", "text": "(6) Emphasised text (e.g. in italic or bold) at the beginning of a paragraph is not considered a Section-header , unless it appears exclusively on its own line.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/415", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 53.52899932861328, "t": 259.0494689941406, "r": 295.5625305175781, "b": 217.798828125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 221]}], "orig": "The complete annotation guideline is over 100 pages long and a detailed description is obviously out of scope for this paper. Nevertheless, it will be made publicly available alongside with DocLayNet for future reference.", "text": "The complete annotation guideline is over 100 pages long and a detailed description is obviously out of scope for this paper. Nevertheless, it will be made publicly available alongside with DocLayNet for future reference."}, {"self_ref": "#/texts/416", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 53.79800033569336, "t": 215.3310089111328, "r": 295.562255859375, "b": 86.29182434082031, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 792]}], "orig": "Phase 3: Training. After a first trial with a small group of people, we realised that providing the annotation guideline and a set of random practice pages did not yield the desired quality level for layout annotation. Therefore we prepared a subset of pages with two different complexity levels, each with a practice and an exam part. 974 pages were reference-annotated by one proficient core team member. Annotation staff were then given the task to annotate the same subsets (blinded from the reference). By comparing the annotations of each staff member with the reference annotations, we could quantify how closely their annotations matched the reference. Only after passing two exam levels with high annotation quality, staff were admitted into the production phase. Practice iterations", "text": "Phase 3: Training. After a first trial with a small group of people, we realised that providing the annotation guideline and a set of random practice pages did not yield the desired quality level for layout annotation. Therefore we prepared a subset of pages with two different complexity levels, each with a practice and an exam part. 974 pages were reference-annotated by one proficient core team member. Annotation staff were then given the task to annotate the same subsets (blinded from the reference). By comparing the annotations of each staff member with the reference annotations, we could quantify how closely their annotations matched the reference. Only after passing two exam levels with high annotation quality, staff were admitted into the production phase. Practice iterations"}, {"self_ref": "#/texts/417", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 5, "bbox": {"l": 317.9549865722656, "t": 318.5060119628906, "r": 559.8057861328125, "b": 288.11480712890625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 173]}], "orig": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous.", "text": "Figure 4: Examples of plausible annotation alternatives for the same page. Criteria in our annotation guideline can resolve cases A to C, while the case D remains ambiguous."}, {"self_ref": "#/texts/418", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 340.00214, "t": 612.20703, "r": 416.20551, "b": 610.09027, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "1ef23f5e6d7f10d393f9947e8208285dce9ae87250ac483ac4b4a59d51b4e037", "text": "1ef23f5e6d7f10d393f9947e8208285dce9ae87250ac483ac4b4a59d51b4e037"}, {"self_ref": "#/texts/419", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 339.38269, "t": 706.80933, "r": 417.83722, "b": 699.716, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 25]}], "orig": "Compliant with guidelines", "text": "Compliant with guidelines"}, {"self_ref": "#/texts/420", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 451.42834, "t": 706.80933, "r": 546.22913, "b": 699.716, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 33]}], "orig": "Plausible but invalid alternative", "text": "Plausible but invalid alternative"}, {"self_ref": "#/texts/421", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 350.33701, "t": 427.14294, "r": 513.48035, "b": 420.04964999999993, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 53]}], "orig": "Borderline case: Two guideline-compliant alternatives", "text": "Borderline case: Two guideline-compliant alternatives"}, {"self_ref": "#/texts/422", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 340.00201, "t": 546.92615, "r": 416.20538, "b": 544.80939, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "03c31a2ee1ed1b583c28957f475ee545d144e1b5a264dc4dd068c8d2f6a64860", "text": "03c31a2ee1ed1b583c28957f475ee545d144e1b5a264dc4dd068c8d2f6a64860"}, {"self_ref": "#/texts/423", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 340.00201, "t": 432.87512, "r": 416.20538, "b": 430.75833, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "1a5cd524f1844c1260c8e8c073e1f442423c264583212b0d0b6626fc780e6ed4", "text": "1a5cd524f1844c1260c8e8c073e1f442423c264583212b0d0b6626fc780e6ed4"}, {"self_ref": "#/texts/424", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 322.19424, "t": 693.65894, "r": 326.01498, "b": 687.74786, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "A", "text": "A"}, {"self_ref": "#/texts/425", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 322.19424, "t": 605.00897, "r": 326.01498, "b": 599.09796, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "B", "text": "B"}, {"self_ref": "#/texts/426", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 322.19424, "t": 538.45807, "r": 326.01498, "b": 532.547, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "C", "text": "C"}, {"self_ref": "#/texts/427", "parent": {"cref": "#/pictures/3"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 322.19424, "t": 424.91504000000003, "r": 326.01498, "b": 419.004, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "D", "text": "D"}, {"self_ref": "#/texts/428", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 400.12841796875, "t": 333.5567321777344, "r": 476.331787109375, "b": 331.43994140625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "05237a14f2524e3f53c8454b074409d05078038a6a36b770fcc8ec7e540deae0", "text": "05237a14f2524e3f53c8454b074409d05078038a6a36b770fcc8ec7e540deae0"}, {"self_ref": "#/texts/429", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 317.62298583984375, "t": 266.5024719238281, "r": 558.204345703125, "b": 247.1688232421875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 123]}], "orig": "were carried out over a timeframe of 12 weeks, after which 8 of the 40 initially allocated annotators did not pass the bar.", "text": "were carried out over a timeframe of 12 weeks, after which 8 of the 40 initially allocated annotators did not pass the bar."}, {"self_ref": "#/texts/430", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 5, "bbox": {"l": 317.62298583984375, "t": 244.7010040283203, "r": 559.7149047851562, "b": 82.78482818603516, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 987]}], "orig": "Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11 class labels by 32 annotators. This production phase took around three months to complete. All annotations were created online through CCS, which visualises the programmatic PDF text-cells as an overlay on the page. The page annotation are obtained by drawing rectangular bounding-boxes, as shown in Figure 3. With regard to the annotation practices, we implemented a few constraints and capabilities on the tooling level. First, we only allow non-overlapping, vertically oriented, rectangular boxes. For the large majority of documents, this constraint was sufficient and it speeds up the annotation considerably in comparison with arbitrary segmentation shapes. Second, annotator staff were not able to see each other's annotations. This was enforced by design to avoid any bias in the annotation, which could skew the numbers of the inter-annotator agreement (see Table 1). We wanted", "text": "Phase 4: Production annotation. The previously selected 80K pages were annotated with the defined 11 class labels by 32 annotators. This production phase took around three months to complete. All annotations were created online through CCS, which visualises the programmatic PDF text-cells as an overlay on the page. The page annotation are obtained by drawing rectangular bounding-boxes, as shown in Figure 3. With regard to the annotation practices, we implemented a few constraints and capabilities on the tooling level. First, we only allow non-overlapping, vertically oriented, rectangular boxes. For the large majority of documents, this constraint was sufficient and it speeds up the annotation considerably in comparison with arbitrary segmentation shapes. Second, annotator staff were not able to see each other's annotations. This was enforced by design to avoid any bias in the annotation, which could skew the numbers of the inter-annotator agreement (see Table 1). We wanted"}, {"self_ref": "#/texts/431", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 6, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 558.202880859375, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 130]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar"}, {"self_ref": "#/texts/432", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 6, "bbox": {"l": 53.50199890136719, "t": 705.1270751953125, "r": 295.64874267578125, "b": 608.98291015625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 489]}], "orig": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset."}, {"self_ref": "#/texts/433", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 53.52899932861328, "t": 421.07244873046875, "r": 295.5561218261719, "b": 215.43682861328125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1252]}], "orig": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and Picture . For the latter, we instructed annotation staff to minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way to flag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in the final dataset. With all these measures in place, experienced annotation staff managed to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and Picture . For the latter, we instructed annotation staff to minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way to flag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in the final dataset. With all these measures in place, experienced annotation staff managed to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity."}, {"self_ref": "#/texts/434", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 6, "bbox": {"l": 53.79800033569336, "t": 203.87008666992188, "r": 147.4853515625, "b": 193.5609893798828, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "5 EXPERIMENTS", "text": "5 EXPERIMENTS", "level": 1}, {"self_ref": "#/texts/435", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 53.48400115966797, "t": 178.74644470214844, "r": 295.4281005859375, "b": 82.7008285522461, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 584]}], "orig": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", "text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this"}, {"self_ref": "#/texts/436", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 6, "bbox": {"l": 317.9549865722656, "t": 512.9840087890625, "r": 559.8057861328125, "b": 449.7158203125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 329]}], "orig": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curve flattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions."}, {"self_ref": "#/texts/437", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 349.16577, "t": 545.31982, "r": 352.48175, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "0", "text": "0"}, {"self_ref": "#/texts/438", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 385.93698, "t": 545.31982, "r": 392.56894, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "20", "text": "20"}, {"self_ref": "#/texts/439", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 424.366, "t": 545.31982, "r": 430.99796, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "40", "text": "40"}, {"self_ref": "#/texts/440", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 462.79504000000003, "t": 545.31982, "r": 469.427, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "60", "text": "60"}, {"self_ref": "#/texts/441", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 501.22406, "t": 545.31982, "r": 507.85602, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "80", "text": "80"}, {"self_ref": "#/texts/442", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 537.99524, "t": 545.31982, "r": 547.94318, "b": 539.24573, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "100", "text": "100"}, {"self_ref": "#/texts/443", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 410.28143, "t": 538.19159, "r": 483.47278000000006, "b": 532.11749, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 27]}], "orig": "% of DocLayNet training set", "text": "% of DocLayNet training set"}, {"self_ref": "#/texts/444", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 330.93539, "t": 573.61536, "r": 337.56735, "b": 567.54126, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "50", "text": "50"}, {"self_ref": "#/texts/445", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 330.93539, "t": 599.91339, "r": 337.56735, "b": 593.83929, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "55", "text": "55"}, {"self_ref": "#/texts/446", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 330.93539, "t": 626.21136, "r": 337.56735, "b": 620.13727, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "60", "text": "60"}, {"self_ref": "#/texts/447", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 330.93539, "t": 652.5094, "r": 337.56735, "b": 646.4353, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "65", "text": "65"}, {"self_ref": "#/texts/448", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 330.93539, "t": 678.80737, "r": 337.56735, "b": 672.73328, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "70", "text": "70"}, {"self_ref": "#/texts/449", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 322.92276, "t": 643.62311, "r": 328.99686, "b": 605.20782, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "mAP 0.50:0.95", "text": "mAP 0.50:0.95"}, {"self_ref": "#/texts/450", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 470.97235, "t": 556.63324, "r": 477.6055, "b": 550.55914, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "10", "text": "10"}, {"self_ref": "#/texts/451", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 477.65662, "t": 557.17609, "r": 479.97778000000005, "b": 552.92419, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}, {"self_ref": "#/texts/452", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 531.55127, "t": 556.58765, "r": 538.18445, "b": 550.51355, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "10", "text": "10"}, {"self_ref": "#/texts/453", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 538.23553, "t": 557.13049, "r": 540.5567, "b": 552.8786, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "2", "text": "2"}, {"self_ref": "#/texts/454", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 404.91125, "t": 575.99994, "r": 411.54321, "b": 569.92584, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "50", "text": "50"}, {"self_ref": "#/texts/455", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 404.91125, "t": 591.77875, "r": 411.54321, "b": 585.70465, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "55", "text": "55"}, {"self_ref": "#/texts/456", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 404.91125, "t": 607.55756, "r": 411.54321, "b": 601.48346, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "60", "text": "60"}, {"self_ref": "#/texts/457", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 404.91125, "t": 623.33636, "r": 411.54321, "b": 617.26227, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "65", "text": "65"}, {"self_ref": "#/texts/458", "parent": {"cref": "#/pictures/4"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 404.91125, "t": 639.11511, "r": 411.54321, "b": 633.04102, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 2]}], "orig": "70", "text": "70"}, {"self_ref": "#/texts/459", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 317.9549865722656, "t": 407.98846435546875, "r": 558.2041625976562, "b": 388.6548156738281, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 102]}], "orig": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", "text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work."}, {"self_ref": "#/texts/460", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 317.6409912109375, "t": 386.0704650878906, "r": 558.4364013671875, "b": 311.9428405761719, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 397]}], "orig": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", "text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16]."}, {"self_ref": "#/texts/461", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 6, "bbox": {"l": 317.9549865722656, "t": 295.1781005859375, "r": 466.8532409667969, "b": 284.8690185546875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 30]}], "orig": "Baselines for Object Detection", "text": "Baselines for Object Detection", "level": 1}, {"self_ref": "#/texts/462", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 6, "bbox": {"l": 317.7489929199219, "t": 279.9754638671875, "r": 558.4308471679688, "b": 85.2998275756836, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1146]}], "orig": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of 1025 \u00d7 1025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as Text , Table and Picture . This is not entirely surprising, as Text , Table and Picture are abundant and the most visually distinctive in a document.", "text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of 1025 \u00d7 1025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as Text , Table and Picture . This is not entirely surprising, as Text , Table and Picture are abundant and the most visually distinctive in a document."}, {"self_ref": "#/texts/463", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 7, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 347.0172424316406, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"}, {"self_ref": "#/texts/464", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 7, "bbox": {"l": 365.75701904296875, "t": 731.6909790039062, "r": 558.2028198242188, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 48]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA"}, {"self_ref": "#/texts/465", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 53.50199890136719, "t": 705.1270751953125, "r": 295.6486511230469, "b": 663.77685546875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 205]}], "orig": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels.", "text": "Table 3: Performance of a Mask R-CNN R50 network in mAP@0.5-0.95 scores trained on DocLayNet with different class label sets. The reduced label sets were obtained by either down-mapping or dropping labels."}, {"self_ref": "#/texts/466", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 7, "bbox": {"l": 317.65899658203125, "t": 705.1270141601562, "r": 559.8068237304688, "b": 663.7767944335938, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 189]}], "orig": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement.", "text": "Table 4: Performance of a Mask R-CNN R50 network with document-wise and page-wise split for different label sets. Naive page-wise split will result in GLYPH 10% point improvement."}, {"self_ref": "#/texts/467", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 7, "bbox": {"l": 53.79800033569336, "t": 472.4300842285156, "r": 131.05624389648438, "b": 462.1210021972656, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 14]}], "orig": "Learning Curve", "text": "Learning Curve", "level": 1}, {"self_ref": "#/texts/468", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 52.78499984741211, "t": 457.22845458984375, "r": 295.558349609375, "b": 262.55181884765625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1157]}], "orig": "One of the fundamental questions related to any dataset is if it is \"large enough\". To answer this question for DocLayNet, we performed a data ablation study in which we evaluated a Mask R-CNN model trained on increasing fractions of the DocLayNet dataset. As can be seen in Figure 5, the mAP score rises sharply in the beginning and eventually levels out. To estimate the error-bar on the metrics, we ran the training five times on the entire data-set. This resulted in a 1% error-bar, depicted by the shaded area in Figure 5. In the inset of Figure 5, we show the exact same data-points, but with a logarithmic scale on the x-axis. As is expected, the mAP score increases linearly as a function of the data-size in the inset. The curve ultimately flattens out between the 80% and 100% mark, with the 80% mark falling within the error-bars of the 100% mark. This provides a good indication that the model would not improve significantly by yet increasing the data size. Rather, it would probably benefit more from improved data consistency (as discussed in Section 3), data augmentation methods [23], or the addition of more document categories and styles.", "text": "One of the fundamental questions related to any dataset is if it is \"large enough\". To answer this question for DocLayNet, we performed a data ablation study in which we evaluated a Mask R-CNN model trained on increasing fractions of the DocLayNet dataset. As can be seen in Figure 5, the mAP score rises sharply in the beginning and eventually levels out. To estimate the error-bar on the metrics, we ran the training five times on the entire data-set. This resulted in a 1% error-bar, depicted by the shaded area in Figure 5. In the inset of Figure 5, we show the exact same data-points, but with a logarithmic scale on the x-axis. As is expected, the mAP score increases linearly as a function of the data-size in the inset. The curve ultimately flattens out between the 80% and 100% mark, with the 80% mark falling within the error-bars of the 100% mark. This provides a good indication that the model would not improve significantly by yet increasing the data size. Rather, it would probably benefit more from improved data consistency (as discussed in Section 3), data augmentation methods [23], or the addition of more document categories and styles."}, {"self_ref": "#/texts/469", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 7, "bbox": {"l": 53.79800033569336, "t": 249.49008178710938, "r": 164.3289794921875, "b": 239.1809844970703, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 22]}], "orig": "Impact of Class Labels", "text": "Impact of Class Labels", "level": 1}, {"self_ref": "#/texts/470", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 53.46699905395508, "t": 234.2884521484375, "r": 295.5567932128906, "b": 83.44783020019531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 910]}], "orig": "The choice and number of labels can have a significant effect on the overall model performance. Since PubLayNet, DocBank and DocLayNet all have different label sets, it is of particular interest to understand and quantify this influence of the label set on the model performance. We investigate this by either down-mapping labels into more common ones (e.g. Caption \u2192 Text ) or excluding them from the annotations entirely. Furthermore, it must be stressed that all mappings and exclusions were performed on the data before model training. In Table 3, we present the mAP scores for a Mask R-CNN R50 network on different label sets. Where a label is down-mapped, we show its corresponding label, otherwise it was excluded. We present three different label sets, with 6, 5 and 4 different labels respectively. The set of 5 labels contains the same labels as PubLayNet. However, due to the different definition of", "text": "The choice and number of labels can have a significant effect on the overall model performance. Since PubLayNet, DocBank and DocLayNet all have different label sets, it is of particular interest to understand and quantify this influence of the label set on the model performance. We investigate this by either down-mapping labels into more common ones (e.g. Caption \u2192 Text ) or excluding them from the annotations entirely. Furthermore, it must be stressed that all mappings and exclusions were performed on the data before model training. In Table 3, we present the mAP scores for a Mask R-CNN R50 network on different label sets. Where a label is down-mapped, we show its corresponding label, otherwise it was excluded. We present three different label sets, with 6, 5 and 4 different labels respectively. The set of 5 labels contains the same labels as PubLayNet. However, due to the different definition of"}, {"self_ref": "#/texts/471", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 317.6860046386719, "t": 460.5964660644531, "r": 559.5849609375, "b": 375.50982666015625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 469]}], "orig": "lists in PubLayNet (grouped list-items) versus DocLayNet (separate list-items), the label set of size 4 is the closest to PubLayNet, in the assumption that the List is down-mapped to Text in PubLayNet. The results in Table 3 show that the prediction accuracy on the remaining class labels does not change significantly when other classes are merged into them. The overall macro-average improves by around 5%, in particular when Page-footer and Page-header are excluded.", "text": "lists in PubLayNet (grouped list-items) versus DocLayNet (separate list-items), the label set of size 4 is the closest to PubLayNet, in the assumption that the List is down-mapped to Text in PubLayNet. The results in Table 3 show that the prediction accuracy on the remaining class labels does not change significantly when other classes are merged into them. The overall macro-average improves by around 5%, in particular when Page-footer and Page-header are excluded."}, {"self_ref": "#/texts/472", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 7, "bbox": {"l": 317.9549560546875, "t": 362.6051025390625, "r": 549.860595703125, "b": 352.2960205078125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 46]}], "orig": "Impact of Document Split in Train and Test Set", "text": "Impact of Document Split in Train and Test Set", "level": 1}, {"self_ref": "#/texts/473", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 317.62298583984375, "t": 347.4034729003906, "r": 559.7138061523438, "b": 196.5628204345703, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 852]}], "orig": "Many documents in DocLayNet have a unique styling. In order to avoid overfitting on a particular style, we have split the train-, test- and validation-sets of DocLayNet on document boundaries, i.e. every document contributes pages to only one set. To the best of our knowledge, this was not considered in PubLayNet or DocBank. To quantify how this affects model performance, we trained and evaluated a Mask R-CNN R50 model on a modified dataset version. Here, the train-, test- and validation-sets were obtained by a randomised draw over the individual pages. As can be seen in Table 4, the difference in model performance is surprisingly large: pagewise splitting gains \u02dc 10% in mAP over the document-wise splitting. Thus, random page-wise splitting of DocLayNet can easily lead to accidental overestimation of model performance and should be avoided.", "text": "Many documents in DocLayNet have a unique styling. In order to avoid overfitting on a particular style, we have split the train-, test- and validation-sets of DocLayNet on document boundaries, i.e. every document contributes pages to only one set. To the best of our knowledge, this was not considered in PubLayNet or DocBank. To quantify how this affects model performance, we trained and evaluated a Mask R-CNN R50 model on a modified dataset version. Here, the train-, test- and validation-sets were obtained by a randomised draw over the individual pages. As can be seen in Table 4, the difference in model performance is surprisingly large: pagewise splitting gains \u02dc 10% in mAP over the document-wise splitting. Thus, random page-wise splitting of DocLayNet can easily lead to accidental overestimation of model performance and should be avoided."}, {"self_ref": "#/texts/474", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 7, "bbox": {"l": 317.9549865722656, "t": 183.6580810546875, "r": 418.5477600097656, "b": 173.34898376464844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 18]}], "orig": "Dataset Comparison", "text": "Dataset Comparison", "level": 1}, {"self_ref": "#/texts/475", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 7, "bbox": {"l": 317.6860046386719, "t": 168.45645141601562, "r": 559.1881713867188, "b": 83.35986328125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 521]}], "orig": "Throughout this paper, we claim that DocLayNet's wider variety of document layouts leads to more robust layout detection models. In Table 5, we provide evidence for that. We trained models on each of the available datasets (PubLayNet, DocBank and DocLayNet) and evaluated them on the test sets of the other datasets. Due to the different label sets and annotation styles, a direct comparison is not possible. Hence, we focussed on the common labels among the datasets. Between PubLayNet and DocLayNet, these are Picture ,", "text": "Throughout this paper, we claim that DocLayNet's wider variety of document layouts leads to more robust layout detection models. In Table 5, we provide evidence for that. We trained models on each of the available datasets (PubLayNet, DocBank and DocLayNet) and evaluated them on the test sets of the other datasets. Due to the different label sets and annotation styles, a direct comparison is not possible. Hence, we focussed on the common labels among the datasets. Between PubLayNet and DocLayNet, these are Picture ,"}, {"self_ref": "#/texts/476", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 8, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 558.202880859375, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 130]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar"}, {"self_ref": "#/texts/477", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 8, "bbox": {"l": 53.50199890136719, "t": 705.1270751953125, "r": 295.648681640625, "b": 641.85888671875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 298]}], "orig": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets.", "text": "Table 5: Prediction Performance (mAP@0.5-0.95) of a Mask R-CNN R50 network across the PubLayNet, DocBank & DocLayNet data-sets. By evaluating on common label classes of each dataset, we observe that the DocLayNet-trained model has much less pronounced variations in performance across all datasets."}, {"self_ref": "#/texts/478", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 53.79800033569336, "t": 401.0794677734375, "r": 294.047119140625, "b": 348.85986328125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 295]}], "orig": "Section-header , Table and Text . Before training, we either mapped or excluded DocLayNet's other labels as specified in table 3, and also PubLayNet's List to Text . Note that the different clustering of lists (by list-element vs. whole list objects) naturally decreases the mAP score for Text .", "text": "Section-header , Table and Text . Before training, we either mapped or excluded DocLayNet's other labels as specified in table 3, and also PubLayNet's List to Text . Note that the different clustering of lists (by list-element vs. whole list objects) naturally decreases the mAP score for Text ."}, {"self_ref": "#/texts/479", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 53.46699905395508, "t": 346.28546142578125, "r": 295.55908203125, "b": 206.40382385253906, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 793]}], "orig": "For comparison of DocBank with DocLayNet, we trained only on Picture and Table clusters of each dataset. We had to exclude Text because successive paragraphs are often grouped together into a single object in DocBank. This paragraph grouping is incompatible with the individual paragraphs of DocLayNet. As can be seen in Table 5, DocLayNet trained models yield better performance compared to the previous datasets. It is noteworthy that the models trained on PubLayNet and DocBank perform very well on their own test set, but have a much lower performance on the foreign datasets. While this also applies to DocLayNet, the difference is far less pronounced. Thus we conclude that DocLayNet trained models are overall more robust and will produce better results for challenging, unseen layouts.", "text": "For comparison of DocBank with DocLayNet, we trained only on Picture and Table clusters of each dataset. We had to exclude Text because successive paragraphs are often grouped together into a single object in DocBank. This paragraph grouping is incompatible with the individual paragraphs of DocLayNet. As can be seen in Table 5, DocLayNet trained models yield better performance compared to the previous datasets. It is noteworthy that the models trained on PubLayNet and DocBank perform very well on their own test set, but have a much lower performance on the foreign datasets. While this also applies to DocLayNet, the difference is far less pronounced. Thus we conclude that DocLayNet trained models are overall more robust and will produce better results for challenging, unseen layouts."}, {"self_ref": "#/texts/480", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 8, "bbox": {"l": 53.79800033569336, "t": 186.9390869140625, "r": 156.00534057617188, "b": 176.62998962402344, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 19]}], "orig": "Example Predictions", "text": "Example Predictions", "level": 1}, {"self_ref": "#/texts/481", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 53.52899932861328, "t": 171.7364501953125, "r": 295.5584411621094, "b": 86.64982604980469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 481]}], "orig": "To conclude this section, we illustrate the quality of layout predictions one can expect from DocLayNet-trained models by providing a selection of examples without any further post-processing applied. Figure 6 shows selected layout predictions on pages from the test-set of DocLayNet. Results look decent in general across document categories, however one can also observe mistakes such as overlapping clusters of different classes, or entirely missing boxes due to low confidence.", "text": "To conclude this section, we illustrate the quality of layout predictions one can expect from DocLayNet-trained models by providing a selection of examples without any further post-processing applied. Figure 6 shows selected layout predictions on pages from the test-set of DocLayNet. Results look decent in general across document categories, however one can also observe mistakes such as overlapping clusters of different classes, or entirely missing boxes due to low confidence."}, {"self_ref": "#/texts/482", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 8, "bbox": {"l": 317.95501708984375, "t": 706.14013671875, "r": 405.7296142578125, "b": 695.8309936523438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "6 CONCLUSION", "text": "6 CONCLUSION", "level": 1}, {"self_ref": "#/texts/483", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 690.9384765625, "r": 559.7137451171875, "b": 605.850830078125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 507]}], "orig": "In this paper, we presented the DocLayNet dataset. It provides the document conversion and layout analysis research community a new and challenging dataset to improve and fine-tune novel ML methods on. In contrast to many other datasets, DocLayNet was created by human annotation in order to obtain reliable layout ground-truth on a wide variety of publication- and typesettingstyles. Including a large proportion of documents outside the scientific publishing domain adds significant value in this respect.", "text": "In this paper, we presented the DocLayNet dataset. It provides the document conversion and layout analysis research community a new and challenging dataset to improve and fine-tune novel ML methods on. In contrast to many other datasets, DocLayNet was created by human annotation in order to obtain reliable layout ground-truth on a wide variety of publication- and typesettingstyles. Including a large proportion of documents outside the scientific publishing domain adds significant value in this respect."}, {"self_ref": "#/texts/484", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 317.6860046386719, "t": 603.2664794921875, "r": 559.717041015625, "b": 507.2208251953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 573]}], "orig": "From the dataset, we have derived on the one hand reference metrics for human performance on document-layout annotation (through double and triple annotations) and on the other hand evaluated the baseline performance of commonly used object detection methods. We also illustrated the impact of various dataset-related aspects on model performance through data-ablation experiments, both from a size and class-label perspective. Last but not least, we compared the accuracy of models trained on other public datasets and showed that DocLayNet trained models are more robust.", "text": "From the dataset, we have derived on the one hand reference metrics for human performance on document-layout annotation (through double and triple annotations) and on the other hand evaluated the baseline performance of commonly used object detection methods. We also illustrated the impact of various dataset-related aspects on model performance through data-ablation experiments, both from a size and class-label perspective. Last but not least, we compared the accuracy of models trained on other public datasets and showed that DocLayNet trained models are more robust."}, {"self_ref": "#/texts/485", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 8, "bbox": {"l": 317.62298583984375, "t": 504.636474609375, "r": 558.4346923828125, "b": 474.3438415527344, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 188]}], "orig": "To date, there is still a significant gap between human and ML accuracy on the layout interpretation task, and we hope that this work will inspire the research community to close that gap.", "text": "To date, there is still a significant gap between human and ML accuracy on the layout interpretation task, and we hope that this work will inspire the research community to close that gap."}, {"self_ref": "#/texts/486", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 456.9081115722656, "r": 387.3695983886719, "b": 446.5990295410156, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 10]}], "orig": "REFERENCES", "text": "REFERENCES", "level": 1}, {"self_ref": "#/texts/487", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 443.29766845703125, "r": 558.2009887695312, "b": 420.8371276855469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 191]}], "orig": "[1] Max G\u00f6bel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013.", "text": "[1] Max G\u00f6bel, Tamir Hassan, Ermelinda Oro, and Giorgio Orsi. Icdar 2013 table competition. In 2013 12th International Conference on Document Analysis and Recognition , pages 1449-1453, 2013.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/488", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 419.38763427734375, "r": 559.3798217773438, "b": 388.9571228027344, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 279]}], "orig": "[2] Christian Clausner, Apostolos Antonacopoulos, and Stefan Pletschacher. Icdar2017 competition on recognition of documents with complex layouts rdcl2017. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 1404-1410, 2017.", "text": "[2] Christian Clausner, Apostolos Antonacopoulos, and Stefan Pletschacher. Icdar2017 competition on recognition of documents with complex layouts rdcl2017. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR) , volume 01, pages 1404-1410, 2017.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/489", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 387.50762939453125, "r": 558.2001342773438, "b": 365.0531005859375, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 213]}], "orig": "[3] Herv\u00e9 D\u00e9jean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), April 2019. http://sac.founderit.com/.", "text": "[3] Herv\u00e9 D\u00e9jean, Jean-Luc Meunier, Liangcai Gao, Yilun Huang, Yu Fang, Florian Kleber, and Eva-Maria Lang. ICDAR 2019 Competition on Table Detection and Recognition (cTDaR), April 2019. http://sac.founderit.com/.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/490", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 363.5966491699219, "r": 559.3787231445312, "b": 333.173095703125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 251]}], "orig": "[4] Antonio Jimeno Yepes, Peter Zhong, and Douglas Burdick. Competition on scientific literature parsing. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 605-617. LNCS 12824, SpringerVerlag, sep 2021.", "text": "[4] Antonio Jimeno Yepes, Peter Zhong, and Douglas Burdick. Competition on scientific literature parsing. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 605-617. LNCS 12824, SpringerVerlag, sep 2021.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/491", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 331.7166442871094, "r": 559.0262451171875, "b": 301.2920837402344, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 261]}], "orig": "[5] Logan Markewich, Hao Zhang, Yubin Xing, Navid Lambert-Shirzad, Jiang Zhexin, Roy Lee, Zhi Li, and Seok-Bum Ko. Segmentation for document layout analysis: not dead yet. International Journal on Document Analysis and Recognition (IJDAR) , pages 1-11, 01 2022.", "text": "[5] Logan Markewich, Hao Zhang, Yubin Xing, Navid Lambert-Shirzad, Jiang Zhexin, Roy Lee, Zhi Li, and Seok-Bum Ko. Segmentation for document layout analysis: not dead yet. International Journal on Document Analysis and Recognition (IJDAR) , pages 1-11, 01 2022.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/492", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 299.83563232421875, "r": 558.20361328125, "b": 277.3751220703125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 235]}], "orig": "[6] Xu Zhong, Jianbin Tang, and Antonio Jimeno-Yepes. Publaynet: Largest dataset ever for document layout analysis. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 1015-1022, sep 2019.", "text": "[6] Xu Zhong, Jianbin Tang, and Antonio Jimeno-Yepes. Publaynet: Largest dataset ever for document layout analysis. In Proceedings of the International Conference on Document Analysis and Recognition , ICDAR, pages 1015-1022, sep 2019.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/493", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.1979675292969, "t": 275.9256286621094, "r": 558.9714965820312, "b": 237.53111267089844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 316]}], "orig": "[7] Minghao Li, Yiheng Xu, Lei Cui, Shaohan Huang, Furu Wei, Zhoujun Li, and Ming Zhou. Docbank: A benchmark dataset for document layout analysis. In Proceedings of the 28th International Conference on Computational Linguistics , COLING, pages 949-960. International Committee on Computational Linguistics, dec 2020.", "text": "[7] Minghao Li, Yiheng Xu, Lei Cui, Shaohan Huang, Furu Wei, Zhoujun Li, and Ming Zhou. Docbank: A benchmark dataset for document layout analysis. In Proceedings of the 28th International Conference on Computational Linguistics , COLING, pages 949-960. International Committee on Computational Linguistics, dec 2020.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/494", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 236.07464599609375, "r": 558.9022216796875, "b": 213.6141357421875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 172]}], "orig": "[8] Riaz Ahmad, Muhammad Tanvir Afzal, and M. Qadir. Information extraction from pdf sources based on rule-based system using integrated formats. In SemWebEval@ESWC , 2016.", "text": "[8] Riaz Ahmad, Muhammad Tanvir Afzal, and M. Qadir. Information extraction from pdf sources based on rule-based system using integrated formats. In SemWebEval@ESWC , 2016.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/495", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 321.197998046875, "t": 212.16464233398438, "r": 559.2744750976562, "b": 181.74110412597656, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 271]}], "orig": "[9] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE Conference on Computer Vision and Pattern Recognition , CVPR, pages 580-587. IEEE Computer Society, jun 2014.", "text": "[9] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE Conference on Computer Vision and Pattern Recognition , CVPR, pages 580-587. IEEE Computer Society, jun 2014.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/496", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 180.28463745117188, "r": 558.2020263671875, "b": 165.7931365966797, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 149]}], "orig": "[10] Ross B. Girshick. Fast R-CNN. In 2015 IEEE International Conference on Computer Vision , ICCV, pages 1440-1448. IEEE Computer Society, dec 2015.", "text": "[10] Ross B. Girshick. Fast R-CNN. In 2015 IEEE International Conference on Computer Vision , ICCV, pages 1440-1448. IEEE Computer Society, dec 2015.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/497", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 164.3436279296875, "r": 558.201416015625, "b": 141.8831329345703, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 227]}], "orig": "[11] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence , 39(6):1137-1149, 2017.", "text": "[11] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence , 39(6):1137-1149, 2017.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/498", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 140.43362426757812, "r": 559.278076171875, "b": 117.98011016845703, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 192]}], "orig": "[12] Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross B. Girshick. Mask R-CNN. In IEEE International Conference on Computer Vision , ICCV, pages 2980-2988. IEEE Computer Society, Oct 2017.", "text": "[12] Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross B. Girshick. Mask R-CNN. In IEEE International Conference on Computer Vision , ICCV, pages 2980-2988. IEEE Computer Society, Oct 2017.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/499", "parent": {"cref": "#/groups/3"}, "children": [], "label": "list_item", "prov": [{"page_no": 8, "bbox": {"l": 317.9549865722656, "t": 116.52364349365234, "r": 558.9715576171875, "b": 86.09910583496094, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 305]}], "orig": "[13] Glenn Jocher, Alex Stoken, Ayush Chaurasia, Jirka Borovec, NanoCode012, TaoXie, Yonghye Kwon, Kalen Michael, Liu Changyu, Jiacong Fang, Abhiram V, Laughing, tkianai, yxNONG, Piotr Skalski, Adam Hogan, Jebastin Nadar, imyhxy, Lorenzo Mammana, Alex Wang, Cristi Fati, Diego Montes, Jan Hajek, Laurentiu", "text": "[13] Glenn Jocher, Alex Stoken, Ayush Chaurasia, Jirka Borovec, NanoCode012, TaoXie, Yonghye Kwon, Kalen Michael, Liu Changyu, Jiacong Fang, Abhiram V, Laughing, tkianai, yxNONG, Piotr Skalski, Adam Hogan, Jebastin Nadar, imyhxy, Lorenzo Mammana, Alex Wang, Cristi Fati, Diego Montes, Jan Hajek, Laurentiu", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/500", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 731.6909790039062, "r": 347.0172424316406, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 71]}], "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"}, {"self_ref": "#/texts/501", "parent": {"cref": "#/body"}, "children": [], "label": "page_header", "prov": [{"page_no": 9, "bbox": {"l": 365.75701904296875, "t": 731.6909790039062, "r": 558.2028198242188, "b": 723.4239501953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 48]}], "orig": "KDD \u201922, August 14-18, 2022, Washington, DC, USA", "text": "KDD \u201922, August 14-18, 2022, Washington, DC, USA"}, {"self_ref": "#/texts/502", "parent": {"cref": "#/body"}, "children": [], "label": "caption", "prov": [{"page_no": 9, "bbox": {"l": 62.323875427246094, "t": 349.7145690917969, "r": 318.5047302246094, "b": 343.73516845703125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 89]}], "orig": "Text Caption List-Item Formula Table Section-Header Picture Page-Header Page-Footer Title", "text": "Text Caption List-Item Formula Table Section-Header Picture Page-Header Page-Footer Title"}, {"self_ref": "#/texts/503", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 231.8804, "t": 490.49457, "r": 235.14504999999997, "b": 377.30856, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "4bed2a8aa51ac37058e79605821bbc426d032b0b6ca8bdf3409ed8508ccd8c67", "text": "4bed2a8aa51ac37058e79605821bbc426d032b0b6ca8bdf3409ed8508ccd8c67"}, {"self_ref": "#/texts/504", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 395.06876, "t": 674.62817, "r": 398.33353, "b": 561.44214, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "2f2a06d08f5ad565d0f5e815f4ddf666365b2cff435cdaeb8850217e8a8efabf", "text": "2f2a06d08f5ad565d0f5e815f4ddf666365b2cff435cdaeb8850217e8a8efabf"}, {"self_ref": "#/texts/505", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 55.775887, "t": 490.49457, "r": 59.04052000000001, "b": 377.30856, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "7f2fd7293e04bf4f1756ae51f5779764933da1d1d2002e3915356050570fc75b", "text": "7f2fd7293e04bf4f1756ae51f5779764933da1d1d2002e3915356050570fc75b"}, {"self_ref": "#/texts/506", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 232.01364, "t": 674.62817, "r": 235.27841000000004, "b": 561.44214, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "1b81cf65f47456ad4faa725d1eb09879bd633af16cfe2bf8cea661b87907bfac", "text": "1b81cf65f47456ad4faa725d1eb09879bd633af16cfe2bf8cea661b87907bfac"}, {"self_ref": "#/texts/507", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 395.20047, "t": 490.49457, "r": 398.46512, "b": 377.30856, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 64]}], "orig": "b60da9d26f488cb133e47d101d35fda1bdca2671ade60764d1cd569590270327", "text": "b60da9d26f488cb133e47d101d35fda1bdca2671ade60764d1cd569590270327"}, {"self_ref": "#/texts/508", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 55.775818, "t": 674.62817, "r": 65.409912, "b": 561.44214, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 70]}], "orig": "2b7b8355a42ebef0cf91583aad9f30f7c9fa63c5b05911730ba15275c024965b$^{A}$", "text": "2b7b8355a42ebef0cf91583aad9f30f7c9fa63c5b05911730ba15275c024965b$^{A}$"}, {"self_ref": "#/texts/509", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 234.56980999999996, "t": 703.4981699999998, "r": 240.06987, "b": 694.9890100000001, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "B", "text": "B"}, {"self_ref": "#/texts/510", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 397.81934, "t": 703.10645, "r": 403.3194, "b": 694.59729, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "C", "text": "C"}, {"self_ref": "#/texts/511", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 59.909843, "t": 525.24115, "r": 65.409912, "b": 516.73206, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "D", "text": "D"}, {"self_ref": "#/texts/512", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 234.77386, "t": 525.63293, "r": 239.85495000000003, "b": 517.12384, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "E", "text": "E"}, {"self_ref": "#/texts/513", "parent": {"cref": "#/pictures/5"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 398.26144, "t": 525.24115, "r": 402.91592, "b": 516.73206, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "F", "text": "F"}, {"self_ref": "#/texts/514", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 327.51800537109375, "r": 559.807861328125, "b": 286.16876220703125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 386]}], "orig": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes.", "text": "Figure 6: Example layout predictions on selected pages from the DocLayNet test-set. (A, D) exhibit favourable results on coloured backgrounds. (B, C) show accurate list-item and paragraph differentiation despite densely-spaced lines. (E) demonstrates good table and figure distinction. (F) shows predictions on a Chinese patent with multiple overlaps, label confusion and missing boxes."}, {"self_ref": "#/texts/515", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 9, "bbox": {"l": 69.23400115966797, "t": 264.93365478515625, "r": 295.22406005859375, "b": 242.4801025390625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 195]}], "orig": "Diaconu, Mai Thanh Minh, Marc, albinxavi, fatih, oleg, and wanghao yang. ultralytics/yolov5: v6.0 - yolov5n nano models, roboflow integration, tensorflow export, opencv dnn support, October 2021.", "text": "Diaconu, Mai Thanh Minh, Marc, albinxavi, fatih, oleg, and wanghao yang. ultralytics/yolov5: v6.0 - yolov5n nano models, roboflow integration, tensorflow export, opencv dnn support, October 2021."}, {"self_ref": "#/texts/516", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 241.02362060546875, "r": 295.12176513671875, "b": 218.56314086914062, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 190]}], "orig": "[14] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-end object detection with transformers. CoRR , abs/2005.12872, 2020.", "text": "[14] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. End-to-end object detection with transformers. CoRR , abs/2005.12872, 2020.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/517", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 217.1136474609375, "r": 294.042236328125, "b": 202.62213134765625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 132]}], "orig": "[15] Mingxing Tan, Ruoming Pang, and Quoc V. Le. Efficientdet: Scalable and efficient object detection. CoRR , abs/1911.09070, 2019.", "text": "[15] Mingxing Tan, Ruoming Pang, and Quoc V. Le. Efficientdet: Scalable and efficient object detection. CoRR , abs/1911.09070, 2019.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/518", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.798004150390625, "t": 201.17263793945312, "r": 295.2226257324219, "b": 178.71910095214844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 219]}], "orig": "[16] Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. Microsoft COCO: common objects in context, 2014.", "text": "[16] Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C. Lawrence Zitnick. Microsoft COCO: common objects in context, 2014.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/519", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 177.26263427734375, "r": 295.1200866699219, "b": 162.77911376953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 100]}], "orig": "[17] Yuxin Wu, Alexander Kirillov, Francisco Massa, Wan-Yen Lo, and Ross Girshick. Detectron2, 2019.", "text": "[17] Yuxin Wu, Alexander Kirillov, Francisco Massa, Wan-Yen Lo, and Ross Girshick. Detectron2, 2019.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/520", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.79800033569336, "t": 161.3226318359375, "r": 294.80889892578125, "b": 122.92810821533203, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 339]}], "orig": "[18] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter W. J. Staar. Robust pdf document conversion using recurrent neural networks. In Proceedings of the 35th Conference on Artificial Intelligence , AAAI, pages 1513715145, feb 2021.", "text": "[18] Nikolaos Livathinos, Cesar Berrospi, Maksym Lysak, Viktor Kuropiatnyk, Ahmed Nassar, Andre Carvalho, Michele Dolfi, Christoph Auer, Kasper Dinkla, and Peter W. J. Staar. Robust pdf document conversion using recurrent neural networks. In Proceedings of the 35th Conference on Artificial Intelligence , AAAI, pages 1513715145, feb 2021.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/521", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 53.797996520996094, "t": 121.47162628173828, "r": 295.22174072265625, "b": 83.07810974121094, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 336]}], "orig": "[19] Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. Layoutlm: Pre-training of text and layout for document image understanding. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 1192-1200, New York, USA, 2020. Association for Computing Machinery.", "text": "[19] Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. Layoutlm: Pre-training of text and layout for document image understanding. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 1192-1200, New York, USA, 2020. Association for Computing Machinery.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/522", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 317.9549865722656, "t": 264.9336242675781, "r": 559.0263671875, "b": 250.45010375976562, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 153]}], "orig": "[20] Shoubin Li, Xuyan Ma, Shuaiqun Pan, Jun Hu, Lin Shi, and Qing Wang. Vtlayout: Fusion of visual and text features for document layout analysis, 2021.", "text": "[20] Shoubin Li, Xuyan Ma, Shuaiqun Pan, Jun Hu, Lin Shi, and Qing Wang. Vtlayout: Fusion of visual and text features for document layout analysis, 2021.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/523", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 317.9549865722656, "t": 248.99362182617188, "r": 558.9714965820312, "b": 226.54010009765625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 188]}], "orig": "[21] Peng Zhang, Can Li, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Fei Wu. Vsr: A unified framework for document layout analysis combining vision, semantics and relations, 2021.", "text": "[21] Peng Zhang, Can Li, Liang Qiao, Zhanzhan Cheng, Shiliang Pu, Yi Niu, and Fei Wu. Vsr: A unified framework for document layout analysis combining vision, semantics and relations, 2021.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/524", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 317.9549865722656, "t": 225.08364868164062, "r": 559.275390625, "b": 194.65213012695312, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 290]}], "orig": "[22] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 774-782. ACM, 2018.", "text": "[22] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. Corpus conversion service: A machine learning platform to ingest documents at scale. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , KDD, pages 774-782. ACM, 2018.", "enumerated": false, "marker": "-"}, {"self_ref": "#/texts/525", "parent": {"cref": "#/groups/4"}, "children": [], "label": "list_item", "prov": [{"page_no": 9, "bbox": {"l": 317.9549865722656, "t": 193.20263671875, "r": 559.3782958984375, "b": 178.71212768554688, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 138]}], "orig": "[23] Connor Shorten and Taghi M. Khoshgoftaar. A survey on image data augmentation for deep learning. Journal of Big Data , 6(1):60, 2019.", "text": "[23] Connor Shorten and Taghi M. Khoshgoftaar. A survey on image data augmentation for deep learning. Journal of Big Data , 6(1):60, 2019.", "enumerated": false, "marker": "-"}], "pictures": [{"self_ref": "#/pictures/0", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/17"}, {"cref": "#/texts/18"}, {"cref": "#/texts/19"}, {"cref": "#/texts/20"}, {"cref": "#/texts/21"}, {"cref": "#/texts/22"}, {"cref": "#/texts/23"}, {"cref": "#/texts/24"}, {"cref": "#/texts/25"}, {"cref": "#/texts/26"}, {"cref": "#/texts/27"}, {"cref": "#/texts/28"}, {"cref": "#/texts/29"}, {"cref": "#/texts/30"}, {"cref": "#/texts/31"}, {"cref": "#/texts/32"}, {"cref": "#/texts/33"}, {"cref": "#/texts/34"}, {"cref": "#/texts/35"}, {"cref": "#/texts/36"}, {"cref": "#/texts/37"}, {"cref": "#/texts/38"}, {"cref": "#/texts/39"}, {"cref": "#/texts/40"}, {"cref": "#/texts/41"}, {"cref": "#/texts/42"}, {"cref": "#/texts/43"}, {"cref": "#/texts/44"}, {"cref": "#/texts/45"}, {"cref": "#/texts/46"}, {"cref": "#/texts/47"}, {"cref": "#/texts/48"}, {"cref": "#/texts/49"}, {"cref": "#/texts/50"}, {"cref": "#/texts/51"}, {"cref": "#/texts/52"}, {"cref": "#/texts/53"}, {"cref": "#/texts/54"}, {"cref": "#/texts/55"}, {"cref": "#/texts/56"}, {"cref": "#/texts/57"}, {"cref": "#/texts/58"}, {"cref": "#/texts/59"}, {"cref": "#/texts/60"}, {"cref": "#/texts/61"}, {"cref": "#/texts/62"}, {"cref": "#/texts/63"}, {"cref": "#/texts/64"}, {"cref": "#/texts/65"}, {"cref": "#/texts/66"}, {"cref": "#/texts/67"}, {"cref": "#/texts/68"}, {"cref": "#/texts/69"}, {"cref": "#/texts/70"}, {"cref": "#/texts/71"}, {"cref": "#/texts/72"}, {"cref": "#/texts/73"}, {"cref": "#/texts/74"}, {"cref": "#/texts/75"}, {"cref": "#/texts/76"}, {"cref": "#/texts/77"}, {"cref": "#/texts/78"}, {"cref": "#/texts/79"}, {"cref": "#/texts/80"}, {"cref": "#/texts/81"}, {"cref": "#/texts/82"}, {"cref": "#/texts/83"}, {"cref": "#/texts/84"}, {"cref": "#/texts/85"}, {"cref": "#/texts/86"}, {"cref": "#/texts/87"}, {"cref": "#/texts/88"}, {"cref": "#/texts/89"}, {"cref": "#/texts/90"}, {"cref": "#/texts/91"}, {"cref": "#/texts/92"}, {"cref": "#/texts/93"}, {"cref": "#/texts/94"}, {"cref": "#/texts/95"}, {"cref": "#/texts/96"}, {"cref": "#/texts/97"}, {"cref": "#/texts/98"}, {"cref": "#/texts/99"}, {"cref": "#/texts/100"}, {"cref": "#/texts/101"}, {"cref": "#/texts/102"}, {"cref": "#/texts/103"}, {"cref": "#/texts/104"}, {"cref": "#/texts/105"}, {"cref": "#/texts/106"}, {"cref": "#/texts/107"}, {"cref": "#/texts/108"}, {"cref": "#/texts/109"}, {"cref": "#/texts/110"}, {"cref": "#/texts/111"}, {"cref": "#/texts/112"}, {"cref": "#/texts/113"}, {"cref": "#/texts/114"}, {"cref": "#/texts/115"}, {"cref": "#/texts/116"}, {"cref": "#/texts/117"}, {"cref": "#/texts/118"}, {"cref": "#/texts/119"}, {"cref": "#/texts/120"}, {"cref": "#/texts/121"}, {"cref": "#/texts/122"}, {"cref": "#/texts/123"}, {"cref": "#/texts/124"}, {"cref": "#/texts/125"}, {"cref": "#/texts/126"}, {"cref": "#/texts/127"}, {"cref": "#/texts/128"}, {"cref": "#/texts/129"}, {"cref": "#/texts/130"}, {"cref": "#/texts/131"}, {"cref": "#/texts/132"}, {"cref": "#/texts/133"}, {"cref": "#/texts/134"}, {"cref": "#/texts/135"}, {"cref": "#/texts/136"}, {"cref": "#/texts/137"}, {"cref": "#/texts/138"}, {"cref": "#/texts/139"}, {"cref": "#/texts/140"}, {"cref": "#/texts/141"}, {"cref": "#/texts/142"}, {"cref": "#/texts/143"}, {"cref": "#/texts/144"}, {"cref": "#/texts/145"}, {"cref": "#/texts/146"}, {"cref": "#/texts/147"}, {"cref": "#/texts/148"}, {"cref": "#/texts/149"}, {"cref": "#/texts/150"}, {"cref": "#/texts/151"}, {"cref": "#/texts/152"}, {"cref": "#/texts/153"}, {"cref": "#/texts/154"}, {"cref": "#/texts/155"}, {"cref": "#/texts/156"}, {"cref": "#/texts/157"}, {"cref": "#/texts/158"}, {"cref": "#/texts/159"}, {"cref": "#/texts/160"}, {"cref": "#/texts/161"}, {"cref": "#/texts/162"}, {"cref": "#/texts/163"}, {"cref": "#/texts/164"}, {"cref": "#/texts/165"}, {"cref": "#/texts/166"}, {"cref": "#/texts/167"}, {"cref": "#/texts/168"}, {"cref": "#/texts/169"}, {"cref": "#/texts/170"}, {"cref": "#/texts/171"}, {"cref": "#/texts/172"}, {"cref": "#/texts/173"}, {"cref": "#/texts/174"}, {"cref": "#/texts/175"}, {"cref": "#/texts/176"}, {"cref": "#/texts/177"}, {"cref": "#/texts/178"}, {"cref": "#/texts/179"}, {"cref": "#/texts/180"}, {"cref": "#/texts/181"}, {"cref": "#/texts/182"}, {"cref": "#/texts/183"}, {"cref": "#/texts/184"}, {"cref": "#/texts/185"}, {"cref": "#/texts/186"}, {"cref": "#/texts/187"}, {"cref": "#/texts/188"}, {"cref": "#/texts/189"}, {"cref": "#/texts/190"}, {"cref": "#/texts/191"}, {"cref": "#/texts/192"}, {"cref": "#/texts/193"}, {"cref": "#/texts/194"}, {"cref": "#/texts/195"}, {"cref": "#/texts/196"}, {"cref": "#/texts/197"}, {"cref": "#/texts/198"}, {"cref": "#/texts/199"}, {"cref": "#/texts/200"}, {"cref": "#/texts/201"}, {"cref": "#/texts/202"}, {"cref": "#/texts/203"}, {"cref": "#/texts/204"}, {"cref": "#/texts/205"}, {"cref": "#/texts/206"}, {"cref": "#/texts/207"}, {"cref": "#/texts/208"}, {"cref": "#/texts/209"}, {"cref": "#/texts/210"}, {"cref": "#/texts/211"}, {"cref": "#/texts/212"}, {"cref": "#/texts/213"}, {"cref": "#/texts/214"}, {"cref": "#/texts/215"}, {"cref": "#/texts/216"}, {"cref": "#/texts/217"}, {"cref": "#/texts/218"}, {"cref": "#/texts/219"}, {"cref": "#/texts/220"}, {"cref": "#/texts/221"}, {"cref": "#/texts/222"}, {"cref": "#/texts/223"}, {"cref": "#/texts/224"}, {"cref": "#/texts/225"}, {"cref": "#/texts/226"}, {"cref": "#/texts/227"}, {"cref": "#/texts/228"}, {"cref": "#/texts/229"}, {"cref": "#/texts/230"}, {"cref": "#/texts/231"}, {"cref": "#/texts/232"}, {"cref": "#/texts/233"}, {"cref": "#/texts/234"}, {"cref": "#/texts/235"}, {"cref": "#/texts/236"}, {"cref": "#/texts/237"}, {"cref": "#/texts/238"}, {"cref": "#/texts/239"}, {"cref": "#/texts/240"}, {"cref": "#/texts/241"}, {"cref": "#/texts/242"}, {"cref": "#/texts/243"}, {"cref": "#/texts/244"}, {"cref": "#/texts/245"}, {"cref": "#/texts/246"}, {"cref": "#/texts/247"}, {"cref": "#/texts/248"}, {"cref": "#/texts/249"}, {"cref": "#/texts/250"}, {"cref": "#/texts/251"}, {"cref": "#/texts/252"}, {"cref": "#/texts/253"}, {"cref": "#/texts/254"}, {"cref": "#/texts/255"}, {"cref": "#/texts/256"}, {"cref": "#/texts/257"}, {"cref": "#/texts/258"}, {"cref": "#/texts/259"}, {"cref": "#/texts/260"}, {"cref": "#/texts/261"}, {"cref": "#/texts/262"}, {"cref": "#/texts/263"}, {"cref": "#/texts/264"}, {"cref": "#/texts/265"}, {"cref": "#/texts/266"}, {"cref": "#/texts/267"}, {"cref": "#/texts/268"}, {"cref": "#/texts/269"}, {"cref": "#/texts/270"}, {"cref": "#/texts/271"}, {"cref": "#/texts/272"}, {"cref": "#/texts/273"}, {"cref": "#/texts/274"}, {"cref": "#/texts/275"}, {"cref": "#/texts/276"}, {"cref": "#/texts/277"}, {"cref": "#/texts/278"}, {"cref": "#/texts/279"}, {"cref": "#/texts/280"}, {"cref": "#/texts/281"}, {"cref": "#/texts/282"}, {"cref": "#/texts/283"}, {"cref": "#/texts/284"}, {"cref": "#/texts/285"}, {"cref": "#/texts/286"}, {"cref": "#/texts/287"}, {"cref": "#/texts/288"}, {"cref": "#/texts/289"}, {"cref": "#/texts/290"}, {"cref": "#/texts/291"}, {"cref": "#/texts/292"}, {"cref": "#/texts/293"}, {"cref": "#/texts/294"}, {"cref": "#/texts/295"}, {"cref": "#/texts/296"}, {"cref": "#/texts/297"}, {"cref": "#/texts/298"}, {"cref": "#/texts/299"}, {"cref": "#/texts/300"}, {"cref": "#/texts/301"}, {"cref": "#/texts/302"}, {"cref": "#/texts/303"}, {"cref": "#/texts/304"}, {"cref": "#/texts/305"}, {"cref": "#/texts/306"}, {"cref": "#/texts/307"}, {"cref": "#/texts/308"}, {"cref": "#/texts/309"}, {"cref": "#/texts/310"}, {"cref": "#/texts/311"}, {"cref": "#/texts/312"}, {"cref": "#/texts/313"}, {"cref": "#/texts/314"}, {"cref": "#/texts/315"}, {"cref": "#/texts/316"}, {"cref": "#/texts/317"}, {"cref": "#/texts/318"}, {"cref": "#/texts/319"}, {"cref": "#/texts/320"}, {"cref": "#/texts/321"}, {"cref": "#/texts/322"}, {"cref": "#/texts/323"}, {"cref": "#/texts/324"}, {"cref": "#/texts/325"}, {"cref": "#/texts/326"}, {"cref": "#/texts/327"}, {"cref": "#/texts/328"}, {"cref": "#/texts/329"}, {"cref": "#/texts/330"}, {"cref": "#/texts/331"}, {"cref": "#/texts/332"}, {"cref": "#/texts/333"}, {"cref": "#/texts/334"}, {"cref": "#/texts/335"}, {"cref": "#/texts/336"}, {"cref": "#/texts/337"}, {"cref": "#/texts/338"}, {"cref": "#/texts/339"}, {"cref": "#/texts/340"}, {"cref": "#/texts/341"}, {"cref": "#/texts/342"}, {"cref": "#/texts/343"}, {"cref": "#/texts/344"}, {"cref": "#/texts/345"}], "label": "picture", "prov": [{"page_no": 1, "bbox": {"l": 323.408203125, "t": 541.6512451171875, "r": 553.2952270507812, "b": 266.1492919921875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 84]}], "captions": [{"cref": "#/texts/16"}], "references": [], "footnotes": [], "image": null, "annotations": []}, {"self_ref": "#/pictures/1", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/373"}, {"cref": "#/texts/374"}, {"cref": "#/texts/375"}, {"cref": "#/texts/376"}, {"cref": "#/texts/377"}, {"cref": "#/texts/378"}, {"cref": "#/texts/379"}, {"cref": "#/texts/380"}, {"cref": "#/texts/381"}, {"cref": "#/texts/382"}, {"cref": "#/texts/383"}, {"cref": "#/texts/384"}], "label": "picture", "prov": [{"page_no": 3, "bbox": {"l": 88.33030700683594, "t": 699.1134643554688, "r": 263.7049560546875, "b": 571.4317626953125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 69]}], "captions": [{"cref": "#/texts/372"}], "references": [], "footnotes": [], "image": null, "annotations": []}, {"self_ref": "#/pictures/2", "parent": {"cref": "#/body"}, "children": [], "label": "picture", "prov": [{"page_no": 4, "bbox": {"l": 53.05912780761719, "t": 481.2087097167969, "r": 295.8506164550781, "b": 251.135986328125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 281]}], "captions": [{"cref": "#/texts/397"}], "references": [], "footnotes": [], "image": null, "annotations": []}, {"self_ref": "#/pictures/3", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/418"}, {"cref": "#/texts/419"}, {"cref": "#/texts/420"}, {"cref": "#/texts/421"}, {"cref": "#/texts/422"}, {"cref": "#/texts/423"}, {"cref": "#/texts/424"}, {"cref": "#/texts/425"}, {"cref": "#/texts/426"}, {"cref": "#/texts/427"}], "label": "picture", "prov": [{"page_no": 5, "bbox": {"l": 315.960205078125, "t": 706.6611938476562, "r": 559.396484375, "b": 332.31915283203125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 173]}], "captions": [{"cref": "#/texts/417"}], "references": [], "footnotes": [], "image": null, "annotations": []}, {"self_ref": "#/pictures/4", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/437"}, {"cref": "#/texts/438"}, {"cref": "#/texts/439"}, {"cref": "#/texts/440"}, {"cref": "#/texts/441"}, {"cref": "#/texts/442"}, {"cref": "#/texts/443"}, {"cref": "#/texts/444"}, {"cref": "#/texts/445"}, {"cref": "#/texts/446"}, {"cref": "#/texts/447"}, {"cref": "#/texts/448"}, {"cref": "#/texts/449"}, {"cref": "#/texts/450"}, {"cref": "#/texts/451"}, {"cref": "#/texts/452"}, {"cref": "#/texts/453"}, {"cref": "#/texts/454"}, {"cref": "#/texts/455"}, {"cref": "#/texts/456"}, {"cref": "#/texts/457"}, {"cref": "#/texts/458"}], "label": "picture", "prov": [{"page_no": 6, "bbox": {"l": 323.48431396484375, "t": 702.1139526367188, "r": 553.5411376953125, "b": 531.9892578125, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 329]}], "captions": [{"cref": "#/texts/436"}], "references": [], "footnotes": [], "image": null, "annotations": []}, {"self_ref": "#/pictures/5", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/503"}, {"cref": "#/texts/504"}, {"cref": "#/texts/505"}, {"cref": "#/texts/506"}, {"cref": "#/texts/507"}, {"cref": "#/texts/508"}, {"cref": "#/texts/509"}, {"cref": "#/texts/510"}, {"cref": "#/texts/511"}, {"cref": "#/texts/512"}, {"cref": "#/texts/513"}], "label": "picture", "prov": [{"page_no": 9, "bbox": {"l": 52.963985443115234, "t": 707.2640991210938, "r": 556.931640625, "b": 349.8648681640625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 89]}], "captions": [{"cref": "#/texts/502"}], "references": [], "footnotes": [], "image": null, "annotations": []}], "tables": [{"self_ref": "#/tables/0", "parent": {"cref": "#/body"}, "children": [], "label": "table", "prov": [{"page_no": 4, "bbox": {"l": 98.93103790283203, "t": 654.5245361328125, "r": 512.579833984375, "b": 497.91851806640625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [{"cref": "#/texts/396"}], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 233.94400024414062, "t": 651.7764892578125, "r": 270.042724609375, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 4, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 6, "text": "% of Total", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 640.8174438476562, "r": 141.7127685546875, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "class label", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 175.94700622558594, "t": 640.8174438476562, "r": 198.7126922607422, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Count", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 213.7949981689453, "t": 640.8174438476562, "r": 233.69143676757812, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Train", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 249.37367248535156, "t": 640.8174438476562, "r": 264.5, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Test", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 283.5356750488281, "t": 640.8174438476562, "r": 295.3085632324219, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Val", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 314.0150146484375, "t": 640.8174438476562, "r": 324.9809265136719, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "All", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 343.0123596191406, "t": 640.8174438476562, "r": 354.6507568359375, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "Fin", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 367.84033203125, "t": 640.8174438476562, "r": 384.3205871582031, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "Man", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 407.5435791015625, "t": 640.8174438476562, "r": 418.1597900390625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "Sci", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 432.2998046875, "t": 640.8174438476562, "r": 447.8296203613281, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "Law", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 465.7265625, "t": 640.8174438476562, "r": 477.5084228515625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "Pat", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 493.52239990234375, "t": 640.8174438476562, "r": 507.17822265625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "Ten", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 629.46044921875, "r": 134.01063537597656, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 629.46044921875, "r": 198.71287536621094, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "22524", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 629.46044921875, "r": 233.69174194335938, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "2.04", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 629.46044921875, "r": 264.50030517578125, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "1.77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 629.46044921875, "r": 295.3088684082031, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "2.32", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 629.46044921875, "r": 324.9811706542969, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "84-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 629.46044921875, "r": 354.6510009765625, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "40-61", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 629.46044921875, "r": 384.3208312988281, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "86-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 629.46044921875, "r": 418.1600341796875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 629.46044921875, "r": 447.8298645019531, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "95-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 629.46044921875, "r": 477.5086669921875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "69-78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 495.32489013671875, "t": 629.46044921875, "r": 507.178466796875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 618.50146484375, "r": 137.3282012939453, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 182.03500366210938, "t": 618.50146484375, "r": 198.71250915527344, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "6318", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 618.50146484375, "r": 233.69174194335938, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "0.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 618.50146484375, "r": 264.50030517578125, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "0.31", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 618.50146484375, "r": 295.3088684082031, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "0.58", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 618.50146484375, "r": 324.9811706542969, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 342.7973937988281, "t": 618.50146484375, "r": 354.6509704589844, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 371.8126525878906, "t": 618.50146484375, "r": 384.3207702636719, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518127441406, "t": 618.50146484375, "r": 418.15997314453125, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "62-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.12164306640625, "t": 618.50146484375, "r": 447.8298034667969, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "85-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.6549987792969, "t": 618.50146484375, "r": 477.5085754394531, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4702453613281, "t": 618.50146484375, "r": 507.17840576171875, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "82-97", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 607.54248046875, "r": 135.33766174316406, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 607.54248046875, "r": 198.71287536621094, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "25027", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 607.54248046875, "r": 233.69174194335938, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "2.25", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 607.54248046875, "r": 264.50030517578125, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "1.90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 607.54248046875, "r": 295.3088684082031, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "2.96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 607.54248046875, "r": 324.9811706542969, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 342.7973937988281, "t": 607.54248046875, "r": 354.6509704589844, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 372.4671936035156, "t": 607.54248046875, "r": 384.3207702636719, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518127441406, "t": 607.54248046875, "r": 418.15997314453125, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "84-87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.12164306640625, "t": 607.54248046875, "r": 447.8298034667969, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "86-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.6549987792969, "t": 607.54248046875, "r": 477.5085754394531, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 495.3247985839844, "t": 607.54248046875, "r": 507.1783752441406, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 596.5834350585938, "r": 137.7047882080078, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 596.5834350585938, "r": 198.7132568359375, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "185660", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 596.5834350585938, "r": 233.69212341308594, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "17.19", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 596.5834350585938, "r": 264.50067138671875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "13.34", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 596.5834350585938, "r": 295.3092346191406, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "15.82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 596.5834350585938, "r": 324.9811706542969, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "87-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 596.5834350585938, "r": 354.6510009765625, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "74-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 596.5834350585938, "r": 384.3208312988281, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 596.5834350585938, "r": 418.1600341796875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "97-97", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 596.5834350585938, "r": 447.8298645019531, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "81-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 596.5834350585938, "r": 477.5086669921875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "75-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 596.5834350585938, "r": 507.1784973144531, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "93-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 585.6244506835938, "r": 147.3526153564453, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 585.6244506835938, "r": 198.71287536621094, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "70878", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 585.6244506835938, "r": 233.69174194335938, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "6.51", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 585.6244506835938, "r": 264.50030517578125, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "5.58", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 585.6244506835938, "r": 295.3088684082031, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "6.00", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 585.6244506835938, "r": 324.9811706542969, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "93-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 585.6244506835938, "r": 354.6510009765625, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "88-90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 585.6244506835938, "r": 384.3208312988281, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "95-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 405.6518859863281, "t": 585.6244506835938, "r": 418.1600036621094, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1216735839844, "t": 585.6244506835938, "r": 447.829833984375, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "92-97", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.00048828125, "t": 585.6244506835938, "r": 477.50860595703125, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.47027587890625, "t": 585.6244506835938, "r": 507.1784362792969, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "96-98", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 574.6654663085938, "r": 150.10531616210938, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 574.6654663085938, "r": 198.71287536621094, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "58022", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 574.6654663085938, "r": 233.69174194335938, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "5.10", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 574.6654663085938, "r": 264.50030517578125, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "6.70", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 574.6654663085938, "r": 295.3088684082031, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "5.06", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 574.6654663085938, "r": 324.9811706542969, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "85-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 574.6654663085938, "r": 354.6510009765625, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "66-76", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 574.6654663085938, "r": 384.3208312988281, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 394.2825012207031, "t": 574.6654663085938, "r": 418.1600341796875, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "98-100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 574.6654663085938, "r": 447.8298645019531, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "91-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 574.6654663085938, "r": 477.5086669921875, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "97-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 574.6654663085938, "r": 507.1784973144531, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "81-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 563.7064819335938, "r": 130.80963134765625, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 563.7064819335938, "r": 198.71287536621094, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "45976", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 563.7064819335938, "r": 233.69174194335938, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "4.21", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 563.7064819335938, "r": 264.50030517578125, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "2.78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 563.7064819335938, "r": 295.3088684082031, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "5.31", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 563.7064819335938, "r": 324.9811706542969, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "69-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 563.7064819335938, "r": 354.6510009765625, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "56-59", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 563.7064819335938, "r": 384.3208312988281, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "82-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 563.7064819335938, "r": 418.1600341796875, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "69-82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 563.7064819335938, "r": 447.8298645019531, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "80-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 563.7064819335938, "r": 477.5086669921875, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "66-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 563.7064819335938, "r": 507.1784973144531, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "59-76", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 552.7474365234375, "r": 159.5648651123047, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 552.7474365234375, "r": 198.7132568359375, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "142884", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 552.7474365234375, "r": 233.69212341308594, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "12.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 552.7474365234375, "r": 264.50067138671875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "15.77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 552.7474365234375, "r": 295.3092346191406, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "12.85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 552.7474365234375, "r": 324.9811706542969, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 552.7474365234375, "r": 354.6510009765625, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "76-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 552.7474365234375, "r": 384.3208312988281, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 552.7474365234375, "r": 418.1600341796875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 552.7474365234375, "r": 447.8298645019531, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "87-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 552.7474365234375, "r": 477.5086669921875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "69-73", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 552.7474365234375, "r": 507.1784973144531, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "78-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 541.7884521484375, "r": 124.63176727294922, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 541.7884521484375, "r": 198.71287536621094, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "34733", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 541.7884521484375, "r": 233.69174194335938, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "3.20", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 541.7884521484375, "r": 264.50030517578125, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "2.27", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 541.7884521484375, "r": 295.3088684082031, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "3.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 541.7884521484375, "r": 324.9811706542969, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 541.7884521484375, "r": 354.6510009765625, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "75-80", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 541.7884521484375, "r": 384.3208312988281, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "83-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 541.7884521484375, "r": 418.1600341796875, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "98-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 541.7884521484375, "r": 447.8298645019531, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "58-80", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 541.7884521484375, "r": 477.5086669921875, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "79-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 541.7884521484375, "r": 507.1784973144531, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "70-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 530.8304443359375, "r": 120.78518676757812, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 530.8304443359375, "r": 198.7132568359375, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "510377", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 530.8304443359375, "r": 233.69212341308594, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "45.82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 530.8304443359375, "r": 264.50067138671875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "49.28", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 530.8304443359375, "r": 295.3092346191406, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "45.00", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 530.8304443359375, "r": 324.9811706542969, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "84-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 530.8304443359375, "r": 354.6510009765625, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "81-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 530.8304443359375, "r": 384.3208312988281, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "88-93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 530.8304443359375, "r": 418.1600341796875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "89-93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 530.8304443359375, "r": 447.8298645019531, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "87-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 530.8304443359375, "r": 477.5086669921875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "71-79", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 530.8304443359375, "r": 507.1784973144531, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "87-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 519.8714599609375, "r": 121.81632995605469, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 182.03500366210938, "t": 519.8714599609375, "r": 198.71250915527344, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "5071", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 519.8714599609375, "r": 233.69174194335938, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "0.47", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 519.8714599609375, "r": 264.50030517578125, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "0.30", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 519.8714599609375, "r": 295.3088684082031, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "0.50", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 519.8714599609375, "r": 324.9811706542969, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "60-72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 519.8714599609375, "r": 354.6510009765625, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "24-63", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 519.8714599609375, "r": 384.3208312988281, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "50-63", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 394.2825012207031, "t": 519.8714599609375, "r": 418.1600341796875, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 519.8714599609375, "r": 447.8298645019531, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "82-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 519.8714599609375, "r": 477.5086669921875, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "68-79", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 519.8714599609375, "r": 507.1784973144531, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "24-56", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 104.82499694824219, "t": 508.5134582519531, "r": 123.43028259277344, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 169.52699279785156, "t": 508.5134582519531, "r": 198.71263122558594, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "1107470", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 208.6750030517578, "t": 508.5134582519531, "r": 233.69125366210938, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "941123", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 243.65292358398438, "t": 508.5134582519531, "r": 264.49981689453125, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "99816", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 274.46148681640625, "t": 508.5134582519531, "r": 295.3083801269531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "66531", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 508.5134582519531, "r": 324.9811706542969, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "82-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 508.5134582519531, "r": 354.6510009765625, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "71-74", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 508.5134582519531, "r": 384.3208312988281, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "79-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 508.5134582519531, "r": 418.1600341796875, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "89-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 508.5134582519531, "r": 447.8298645019531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "86-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 508.5134582519531, "r": 477.5086669921875, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "71-76", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 508.5134582519531, "r": 507.1784973144531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "68-85", "column_header": false, "row_header": false, "row_section": false}], "num_rows": 14, "num_cols": 12, "grid": [[{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 233.94400024414062, "t": 651.7764892578125, "r": 270.042724609375, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 4, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 6, "text": "% of Total", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 233.94400024414062, "t": 651.7764892578125, "r": 270.042724609375, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 4, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 6, "text": "% of Total", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 233.94400024414062, "t": 651.7764892578125, "r": 270.042724609375, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 4, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 6, "text": "% of Total", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 233.94400024414062, "t": 651.7764892578125, "r": 270.042724609375, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 4, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 6, "text": "% of Total", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 329.04998779296875, "t": 651.7764892578125, "r": 483.39764404296875, "b": 643.40185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 6, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 6, "end_col_offset_idx": 12, "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 640.8174438476562, "r": 141.7127685546875, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "class label", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 175.94700622558594, "t": 640.8174438476562, "r": 198.7126922607422, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Count", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 213.7949981689453, "t": 640.8174438476562, "r": 233.69143676757812, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Train", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 249.37367248535156, "t": 640.8174438476562, "r": 264.5, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Test", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 283.5356750488281, "t": 640.8174438476562, "r": 295.3085632324219, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Val", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 314.0150146484375, "t": 640.8174438476562, "r": 324.9809265136719, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "All", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 343.0123596191406, "t": 640.8174438476562, "r": 354.6507568359375, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "Fin", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 367.84033203125, "t": 640.8174438476562, "r": 384.3205871582031, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "Man", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 407.5435791015625, "t": 640.8174438476562, "r": 418.1597900390625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "Sci", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 432.2998046875, "t": 640.8174438476562, "r": 447.8296203613281, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "Law", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 465.7265625, "t": 640.8174438476562, "r": 477.5084228515625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "Pat", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 493.52239990234375, "t": 640.8174438476562, "r": 507.17822265625, "b": 632.4428100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "Ten", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 629.46044921875, "r": 134.01063537597656, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 629.46044921875, "r": 198.71287536621094, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "22524", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 629.46044921875, "r": 233.69174194335938, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "2.04", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 629.46044921875, "r": 264.50030517578125, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "1.77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 629.46044921875, "r": 295.3088684082031, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "2.32", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 629.46044921875, "r": 324.9811706542969, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "84-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 629.46044921875, "r": 354.6510009765625, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "40-61", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 629.46044921875, "r": 384.3208312988281, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "86-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 629.46044921875, "r": 418.1600341796875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 629.46044921875, "r": 447.8298645019531, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "95-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 629.46044921875, "r": 477.5086669921875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "69-78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 495.32489013671875, "t": 629.46044921875, "r": 507.178466796875, "b": 621.0858154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 618.50146484375, "r": 137.3282012939453, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 182.03500366210938, "t": 618.50146484375, "r": 198.71250915527344, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "6318", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 618.50146484375, "r": 233.69174194335938, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "0.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 618.50146484375, "r": 264.50030517578125, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "0.31", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 618.50146484375, "r": 295.3088684082031, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "0.58", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 618.50146484375, "r": 324.9811706542969, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 342.7973937988281, "t": 618.50146484375, "r": 354.6509704589844, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 371.8126525878906, "t": 618.50146484375, "r": 384.3207702636719, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518127441406, "t": 618.50146484375, "r": 418.15997314453125, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "62-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.12164306640625, "t": 618.50146484375, "r": 447.8298034667969, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "85-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.6549987792969, "t": 618.50146484375, "r": 477.5085754394531, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4702453613281, "t": 618.50146484375, "r": 507.17840576171875, "b": 610.1268310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "82-97", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 607.54248046875, "r": 135.33766174316406, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 607.54248046875, "r": 198.71287536621094, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "25027", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 607.54248046875, "r": 233.69174194335938, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "2.25", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 607.54248046875, "r": 264.50030517578125, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "1.90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 607.54248046875, "r": 295.3088684082031, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "2.96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 607.54248046875, "r": 324.9811706542969, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 342.7973937988281, "t": 607.54248046875, "r": 354.6509704589844, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 372.4671936035156, "t": 607.54248046875, "r": 384.3207702636719, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518127441406, "t": 607.54248046875, "r": 418.15997314453125, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "84-87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.12164306640625, "t": 607.54248046875, "r": 447.8298034667969, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "86-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.6549987792969, "t": 607.54248046875, "r": 477.5085754394531, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 495.3247985839844, "t": 607.54248046875, "r": 507.1783752441406, "b": 599.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "n/a", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 596.5834350585938, "r": 137.7047882080078, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 596.5834350585938, "r": 198.7132568359375, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "185660", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 596.5834350585938, "r": 233.69212341308594, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "17.19", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 596.5834350585938, "r": 264.50067138671875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "13.34", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 596.5834350585938, "r": 295.3092346191406, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "15.82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 596.5834350585938, "r": 324.9811706542969, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "87-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 596.5834350585938, "r": 354.6510009765625, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "74-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 596.5834350585938, "r": 384.3208312988281, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 596.5834350585938, "r": 418.1600341796875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "97-97", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 596.5834350585938, "r": 447.8298645019531, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "81-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 596.5834350585938, "r": 477.5086669921875, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "75-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 596.5834350585938, "r": 507.1784973144531, "b": 588.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "93-95", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 585.6244506835938, "r": 147.3526153564453, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 585.6244506835938, "r": 198.71287536621094, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "70878", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 585.6244506835938, "r": 233.69174194335938, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "6.51", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 585.6244506835938, "r": 264.50030517578125, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "5.58", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 585.6244506835938, "r": 295.3088684082031, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "6.00", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 585.6244506835938, "r": 324.9811706542969, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "93-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 585.6244506835938, "r": 354.6510009765625, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "88-90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 585.6244506835938, "r": 384.3208312988281, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "95-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 405.6518859863281, "t": 585.6244506835938, "r": 418.1600036621094, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1216735839844, "t": 585.6244506835938, "r": 447.829833984375, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "92-97", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 465.00048828125, "t": 585.6244506835938, "r": 477.50860595703125, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.47027587890625, "t": 585.6244506835938, "r": 507.1784362792969, "b": 577.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "96-98", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 574.6654663085938, "r": 150.10531616210938, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 574.6654663085938, "r": 198.71287536621094, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "58022", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 574.6654663085938, "r": 233.69174194335938, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "5.10", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 574.6654663085938, "r": 264.50030517578125, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "6.70", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 574.6654663085938, "r": 295.3088684082031, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "5.06", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 574.6654663085938, "r": 324.9811706542969, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "85-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 574.6654663085938, "r": 354.6510009765625, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "66-76", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 574.6654663085938, "r": 384.3208312988281, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 394.2825012207031, "t": 574.6654663085938, "r": 418.1600341796875, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "98-100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 574.6654663085938, "r": 447.8298645019531, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "91-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 574.6654663085938, "r": 477.5086669921875, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "97-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 574.6654663085938, "r": 507.1784973144531, "b": 566.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "81-86", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 563.7064819335938, "r": 130.80963134765625, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 563.7064819335938, "r": 198.71287536621094, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "45976", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 563.7064819335938, "r": 233.69174194335938, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "4.21", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 563.7064819335938, "r": 264.50030517578125, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "2.78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 563.7064819335938, "r": 295.3088684082031, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "5.31", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 563.7064819335938, "r": 324.9811706542969, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "69-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 563.7064819335938, "r": 354.6510009765625, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "56-59", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 563.7064819335938, "r": 384.3208312988281, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "82-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 563.7064819335938, "r": 418.1600341796875, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "69-82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 563.7064819335938, "r": 447.8298645019531, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "80-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 563.7064819335938, "r": 477.5086669921875, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "66-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 563.7064819335938, "r": 507.1784973144531, "b": 555.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "59-76", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 552.7474365234375, "r": 159.5648651123047, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 552.7474365234375, "r": 198.7132568359375, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "142884", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 552.7474365234375, "r": 233.69212341308594, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "12.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 552.7474365234375, "r": 264.50067138671875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "15.77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 552.7474365234375, "r": 295.3092346191406, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "12.85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 552.7474365234375, "r": 324.9811706542969, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "83-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 552.7474365234375, "r": 354.6510009765625, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "76-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 552.7474365234375, "r": 384.3208312988281, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "90-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 552.7474365234375, "r": 418.1600341796875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 552.7474365234375, "r": 447.8298645019531, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "87-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 552.7474365234375, "r": 477.5086669921875, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "69-73", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 552.7474365234375, "r": 507.1784973144531, "b": 544.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "78-86", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 541.7884521484375, "r": 124.63176727294922, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 177.86599731445312, "t": 541.7884521484375, "r": 198.71287536621094, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "34733", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 541.7884521484375, "r": 233.69174194335938, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "3.20", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 541.7884521484375, "r": 264.50030517578125, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "2.27", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 541.7884521484375, "r": 295.3088684082031, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "3.60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 541.7884521484375, "r": 324.9811706542969, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 541.7884521484375, "r": 354.6510009765625, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "75-80", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 541.7884521484375, "r": 384.3208312988281, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "83-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 541.7884521484375, "r": 418.1600341796875, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "98-99", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 541.7884521484375, "r": 447.8298645019531, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "58-80", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 541.7884521484375, "r": 477.5086669921875, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "79-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 541.7884521484375, "r": 507.1784973144531, "b": 533.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "70-85", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 530.8304443359375, "r": 120.78518676757812, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 173.69700622558594, "t": 530.8304443359375, "r": 198.7132568359375, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "510377", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 215.04200744628906, "t": 530.8304443359375, "r": 233.69212341308594, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "45.82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.85055541992188, "t": 530.8304443359375, "r": 264.50067138671875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "49.28", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 276.65911865234375, "t": 530.8304443359375, "r": 295.3092346191406, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "45.00", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 530.8304443359375, "r": 324.9811706542969, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "84-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 530.8304443359375, "r": 354.6510009765625, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "81-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 530.8304443359375, "r": 384.3208312988281, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "88-93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 530.8304443359375, "r": 418.1600341796875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "89-93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 530.8304443359375, "r": 447.8298645019531, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "87-92", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 530.8304443359375, "r": 477.5086669921875, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "71-79", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 530.8304443359375, "r": 507.1784973144531, "b": 522.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "87-95", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 519.8714599609375, "r": 121.81632995605469, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 182.03500366210938, "t": 519.8714599609375, "r": 198.71250915527344, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "5071", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 219.21099853515625, "t": 519.8714599609375, "r": 233.69174194335938, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "0.47", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 250.01956176757812, "t": 519.8714599609375, "r": 264.50030517578125, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "0.30", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 280.828125, "t": 519.8714599609375, "r": 295.3088684082031, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "0.50", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 519.8714599609375, "r": 324.9811706542969, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "60-72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 519.8714599609375, "r": 354.6510009765625, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "24-63", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 519.8714599609375, "r": 384.3208312988281, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "50-63", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 394.2825012207031, "t": 519.8714599609375, "r": 418.1600341796875, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "94-100", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 519.8714599609375, "r": 447.8298645019531, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "82-96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 519.8714599609375, "r": 477.5086669921875, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "68-79", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 519.8714599609375, "r": 507.1784973144531, "b": 511.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "24-56", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 104.82499694824219, "t": 508.5134582519531, "r": 123.43028259277344, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 169.52699279785156, "t": 508.5134582519531, "r": 198.71263122558594, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "1107470", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 208.6750030517578, "t": 508.5134582519531, "r": 233.69125366210938, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "941123", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 243.65292358398438, "t": 508.5134582519531, "r": 264.49981689453125, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "99816", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 274.46148681640625, "t": 508.5134582519531, "r": 295.3083801269531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "66531", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 305.27301025390625, "t": 508.5134582519531, "r": 324.9811706542969, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "82-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 334.9428405761719, "t": 508.5134582519531, "r": 354.6510009765625, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 6, "end_col_offset_idx": 7, "text": "71-74", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 364.6126708984375, "t": 508.5134582519531, "r": 384.3208312988281, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 7, "end_col_offset_idx": 8, "text": "79-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 398.4518737792969, "t": 508.5134582519531, "r": 418.1600341796875, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 8, "end_col_offset_idx": 9, "text": "89-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 428.1217041015625, "t": 508.5134582519531, "r": 447.8298645019531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 9, "end_col_offset_idx": 10, "text": "86-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 457.8005065917969, "t": 508.5134582519531, "r": 477.5086669921875, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 10, "end_col_offset_idx": 11, "text": "71-76", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 487.4703369140625, "t": 508.5134582519531, "r": 507.1784973144531, "b": 500.1388244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 11, "end_col_offset_idx": 12, "text": "68-85", "column_header": false, "row_header": false, "row_section": false}]]}}, {"self_ref": "#/tables/1", "parent": {"cref": "#/body"}, "children": [], "label": "table", "prov": [{"page_no": 6, "bbox": {"l": 62.02753829956055, "t": 596.3199462890625, "r": 285.78955078125, "b": 440.3381042480469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [{"cref": "#/texts/432"}], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 132.36500549316406, "t": 594.0264892578125, "r": 157.99098205566406, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 2, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "human", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 173.5050048828125, "t": 594.0264892578125, "r": 204.618408203125, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 4, "text": "MRCNN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 220.13027954101562, "t": 594.0264892578125, "r": 248.069580078125, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "FRCNN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 258.03125, "t": 594.0264892578125, "r": 280.1782531738281, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "YOLO", "column_header": true, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 168.39300537109375, "t": 583.0674438476562, "r": 181.9950408935547, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "R50", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 192.39605712890625, "t": 583.0674438476562, "r": 210.16746520996094, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "R101", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 225.2130889892578, "t": 583.0674438476562, "r": 242.9844970703125, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "R101", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 260.5137939453125, "t": 583.0674438476562, "r": 277.702392578125, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "v5x6", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 571.71044921875, "r": 96.8486328125, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 571.71044921875, "r": 155.0321502685547, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "84-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 571.71044921875, "r": 182.43472290039062, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "68.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 571.71044921875, "r": 208.52694702148438, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 571.71044921875, "r": 241.34396362304688, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "70.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 571.71044921875, "r": 276.3487854003906, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 560.75146484375, "r": 100.16619873046875, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 560.75146484375, "r": 155.0321502685547, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 560.75146484375, "r": 182.43472290039062, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "70.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 560.75146484375, "r": 208.52694702148438, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 560.75146484375, "r": 241.34396362304688, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "73.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 560.75146484375, "r": 276.3487854003906, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 549.79248046875, "r": 98.1756591796875, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 549.79248046875, "r": 155.0321502685547, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 549.79248046875, "r": 182.43472290039062, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "60.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 549.79248046875, "r": 208.52694702148438, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "63.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 549.79248046875, "r": 241.34396362304688, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "63.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 549.79248046875, "r": 276.3487854003906, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "66.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 538.8334350585938, "r": 100.54279327392578, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 538.8334350585938, "r": 155.0321502685547, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "87-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 538.8334350585938, "r": 182.43472290039062, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "81.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 538.8334350585938, "r": 208.52694702148438, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "80.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 538.8334350585938, "r": 241.34396362304688, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "81.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 538.8334350585938, "r": 276.3487854003906, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "86.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 527.8744506835938, "r": 110.19064331054688, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 527.8744506835938, "r": 155.0321502685547, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "93-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 527.8744506835938, "r": 182.43472290039062, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "61.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 527.8744506835938, "r": 208.52694702148438, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "59.3", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 527.8744506835938, "r": 241.34396362304688, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "58.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 527.8744506835938, "r": 276.3487854003906, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "61.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 516.9154663085938, "r": 112.94332122802734, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 516.9154663085938, "r": 155.0321502685547, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 516.9154663085938, "r": 182.43472290039062, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "71.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 516.9154663085938, "r": 208.52694702148438, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "70.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 516.9154663085938, "r": 241.34396362304688, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 516.9154663085938, "r": 276.3487854003906, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "67.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 505.9564514160156, "r": 93.64762878417969, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 505.9564514160156, "r": 155.0321502685547, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "69-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 505.9564514160156, "r": 182.43472290039062, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "71.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 505.9564514160156, "r": 208.52694702148438, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 505.9564514160156, "r": 241.34396362304688, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 505.9564514160156, "r": 276.3487854003906, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 494.9974670410156, "r": 122.40287780761719, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 494.9974670410156, "r": 155.0321502685547, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 494.9974670410156, "r": 182.43472290039062, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 494.9974670410156, "r": 208.52694702148438, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69.3", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 494.9974670410156, "r": 241.34396362304688, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 494.9974670410156, "r": 276.3487854003906, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "74.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 484.0384521484375, "r": 87.46977996826172, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 484.0384521484375, "r": 155.0321502685547, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 484.0384521484375, "r": 182.43472290039062, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "82.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 484.0384521484375, "r": 208.52694702148438, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 484.0384521484375, "r": 241.34396362304688, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 484.0384521484375, "r": 276.3487854003906, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "86.3", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 473.0804748535156, "r": 83.62319946289062, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 473.0804748535156, "r": 155.0321502685547, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "84-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 473.0804748535156, "r": 182.43472290039062, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 473.0804748535156, "r": 208.52694702148438, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "85.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 473.0804748535156, "r": 241.34396362304688, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "85.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 473.0804748535156, "r": 276.3487854003906, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "88.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 462.1214599609375, "r": 84.65432739257812, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 462.1214599609375, "r": 155.0321502685547, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60-72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 462.1214599609375, "r": 182.43472290039062, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "76.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 462.1214599609375, "r": 208.52694702148438, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "80.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 462.1214599609375, "r": 241.34396362304688, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "79.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 462.1214599609375, "r": 276.3487854003906, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "82.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 67.66300201416016, "t": 450.7634582519531, "r": 78.62890625, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "All", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 450.7634582519531, "r": 155.0321502685547, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 450.7634582519531, "r": 182.43472290039062, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "72.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 450.7634582519531, "r": 208.52694702148438, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "73.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 450.7634582519531, "r": 241.34396362304688, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "73.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 450.7634582519531, "r": 276.3487854003906, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "76.8", "column_header": false, "row_header": false, "row_section": false}], "num_rows": 14, "num_cols": 6, "grid": [[{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 132.36500549316406, "t": 594.0264892578125, "r": 157.99098205566406, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 2, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "human", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 173.5050048828125, "t": 594.0264892578125, "r": 204.618408203125, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 4, "text": "MRCNN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 173.5050048828125, "t": 594.0264892578125, "r": 204.618408203125, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 4, "text": "MRCNN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 220.13027954101562, "t": 594.0264892578125, "r": 248.069580078125, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "FRCNN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 258.03125, "t": 594.0264892578125, "r": 280.1782531738281, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "YOLO", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 132.36500549316406, "t": 594.0264892578125, "r": 157.99098205566406, "b": 585.65185546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 2, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "human", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 168.39300537109375, "t": 583.0674438476562, "r": 181.9950408935547, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "R50", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 192.39605712890625, "t": 583.0674438476562, "r": 210.16746520996094, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "R101", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 225.2130889892578, "t": 583.0674438476562, "r": 242.9844970703125, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "R101", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 260.5137939453125, "t": 583.0674438476562, "r": 277.702392578125, "b": 574.6928100585938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "v5x6", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 571.71044921875, "r": 96.8486328125, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 571.71044921875, "r": 155.0321502685547, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "84-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 571.71044921875, "r": 182.43472290039062, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "68.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 571.71044921875, "r": 208.52694702148438, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 571.71044921875, "r": 241.34396362304688, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "70.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 571.71044921875, "r": 276.3487854003906, "b": 563.3358154296875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.7", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 560.75146484375, "r": 100.16619873046875, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 560.75146484375, "r": 155.0321502685547, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 560.75146484375, "r": 182.43472290039062, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "70.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 560.75146484375, "r": 208.52694702148438, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 560.75146484375, "r": 241.34396362304688, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "73.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 560.75146484375, "r": 276.3487854003906, "b": 552.3768310546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.2", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 549.79248046875, "r": 98.1756591796875, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 549.79248046875, "r": 155.0321502685547, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 549.79248046875, "r": 182.43472290039062, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "60.1", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 549.79248046875, "r": 208.52694702148438, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "63.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 549.79248046875, "r": 241.34396362304688, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "63.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 549.79248046875, "r": 276.3487854003906, "b": 541.4178466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "66.2", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 538.8334350585938, "r": 100.54279327392578, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 538.8334350585938, "r": 155.0321502685547, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "87-88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 538.8334350585938, "r": 182.43472290039062, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "81.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 538.8334350585938, "r": 208.52694702148438, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "80.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 538.8334350585938, "r": 241.34396362304688, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "81.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 538.8334350585938, "r": 276.3487854003906, "b": 530.4588012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "86.2", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 527.8744506835938, "r": 110.19064331054688, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 527.8744506835938, "r": 155.0321502685547, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "93-94", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 527.8744506835938, "r": 182.43472290039062, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "61.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 527.8744506835938, "r": 208.52694702148438, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "59.3", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 527.8744506835938, "r": 241.34396362304688, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "58.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 527.8744506835938, "r": 276.3487854003906, "b": 519.4998168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "61.1", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 516.9154663085938, "r": 112.94332122802734, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 516.9154663085938, "r": 155.0321502685547, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85-89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 516.9154663085938, "r": 182.43472290039062, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "71.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 516.9154663085938, "r": 208.52694702148438, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "70.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 516.9154663085938, "r": 241.34396362304688, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 516.9154663085938, "r": 276.3487854003906, "b": 508.54083251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "67.9", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 505.9564514160156, "r": 93.64762878417969, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 505.9564514160156, "r": 155.0321502685547, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "69-71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 505.9564514160156, "r": 182.43472290039062, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "71.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 505.9564514160156, "r": 208.52694702148438, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 505.9564514160156, "r": 241.34396362304688, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72.0", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 505.9564514160156, "r": 276.3487854003906, "b": 497.5818176269531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "77.1", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 494.9974670410156, "r": 122.40287780761719, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 494.9974670410156, "r": 155.0321502685547, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "83-84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 494.9974670410156, "r": 182.43472290039062, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 494.9974670410156, "r": 208.52694702148438, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69.3", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 494.9974670410156, "r": 241.34396362304688, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 494.9974670410156, "r": 276.3487854003906, "b": 486.6228332519531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "74.6", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 484.0384521484375, "r": 87.46977996826172, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 484.0384521484375, "r": 155.0321502685547, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77-81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 484.0384521484375, "r": 182.43472290039062, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "82.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 484.0384521484375, "r": 208.52694702148438, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 484.0384521484375, "r": 241.34396362304688, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82.2", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 484.0384521484375, "r": 276.3487854003906, "b": 475.663818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "86.3", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 473.0804748535156, "r": 83.62319946289062, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 473.0804748535156, "r": 155.0321502685547, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "84-86", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 473.0804748535156, "r": 182.43472290039062, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84.6", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 473.0804748535156, "r": 208.52694702148438, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "85.8", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 473.0804748535156, "r": 241.34396362304688, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "85.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 473.0804748535156, "r": 276.3487854003906, "b": 464.7058410644531, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "88.1", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 462.1214599609375, "r": 84.65432739257812, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 462.1214599609375, "r": 155.0321502685547, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60-72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 462.1214599609375, "r": 182.43472290039062, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "76.7", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 462.1214599609375, "r": 208.52694702148438, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "80.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 462.1214599609375, "r": 241.34396362304688, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "79.9", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 462.1214599609375, "r": 276.3487854003906, "b": 453.746826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "82.7", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 67.66300201416016, "t": 450.7634582519531, "r": 78.62890625, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "All", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 135.32400512695312, "t": 450.7634582519531, "r": 155.0321502685547, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82-83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 167.95399475097656, "t": 450.7634582519531, "r": 182.43472290039062, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "72.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 194.04620361328125, "t": 450.7634582519531, "r": 208.52694702148438, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "73.5", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 226.8632354736328, "t": 450.7634582519531, "r": 241.34396362304688, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "73.4", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 261.8680419921875, "t": 450.7634582519531, "r": 276.3487854003906, "b": 442.3888244628906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 5, "end_col_offset_idx": 6, "text": "76.8", "column_header": false, "row_header": false, "row_section": false}]]}}, {"self_ref": "#/tables/2", "parent": {"cref": "#/body"}, "children": [], "label": "table", "prov": [{"page_no": 7, "bbox": {"l": 80.35525512695312, "t": 641.063720703125, "r": 267.0082092285156, "b": 496.5545349121094, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [{"cref": "#/texts/466"}], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [{"bbox": {"l": 86.37200164794922, "t": 638.8994750976562, "r": 129.4645233154297, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Class-count", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 638.8994750976562, "r": 159.41275024414062, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "11", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 179.3181610107422, "t": 638.8994750976562, "r": 183.48753356933594, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "6", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 213.33668518066406, "t": 638.8994750976562, "r": 217.5060577392578, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "5", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 247.35520935058594, "t": 638.8994750976562, "r": 251.5245819091797, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "4", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 627.54248046875, "r": 115.55763244628906, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 627.54248046875, "r": 159.41275024414062, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 627.54248046875, "r": 189.38742065429688, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 627.54248046875, "r": 223.40594482421875, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 627.54248046875, "r": 257.4244689941406, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 616.5834350585938, "r": 118.87519836425781, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 616.5834350585938, "r": 159.41275024414062, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 616.5834350585938, "r": 189.38742065429688, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 616.5834350585938, "r": 223.40594482421875, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 616.5834350585938, "r": 257.4244689941406, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 605.6244506835938, "r": 116.88465881347656, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 605.6244506835938, "r": 159.41275024414062, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 605.6244506835938, "r": 189.38742065429688, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 605.6244506835938, "r": 223.40594482421875, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 605.6244506835938, "r": 257.4244689941406, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 594.6654663085938, "r": 119.25179290771484, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 594.6654663085938, "r": 159.41275024414062, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 594.6654663085938, "r": 189.38742065429688, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.2564697265625, "t": 594.6654663085938, "r": 219.59521484375, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.46426391601562, "t": 594.6654663085938, "r": 257.4244689941406, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 583.7064819335938, "r": 128.89964294433594, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 583.7064819335938, "r": 159.41275024414062, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 583.7064819335938, "r": 185.57669067382812, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 213.9105224609375, "t": 583.7064819335938, "r": 216.941162109375, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 247.92904663085938, "t": 583.7064819335938, "r": 250.95968627929688, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 572.7474365234375, "r": 131.65231323242188, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 572.7474365234375, "r": 159.41275024414062, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 572.7474365234375, "r": 185.57669067382812, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 213.9105224609375, "t": 572.7474365234375, "r": 216.941162109375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 247.92904663085938, "t": 572.7474365234375, "r": 250.95968627929688, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 561.7884521484375, "r": 112.35662841796875, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 561.7884521484375, "r": 159.41275024414062, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 561.7884521484375, "r": 185.57669067382812, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 561.7884521484375, "r": 219.59519958496094, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 561.7884521484375, "r": 253.61370849609375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 550.8304443359375, "r": 141.11187744140625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 550.8304443359375, "r": 159.41275024414062, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 550.8304443359375, "r": 185.57669067382812, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 550.8304443359375, "r": 219.59519958496094, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 550.8304443359375, "r": 253.61370849609375, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 539.8714599609375, "r": 106.17877960205078, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 539.8714599609375, "r": 159.41275024414062, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 539.8714599609375, "r": 185.57669067382812, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 539.8714599609375, "r": 219.59519958496094, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 539.8714599609375, "r": 253.61370849609375, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 528.9124755859375, "r": 102.33219909667969, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 528.9124755859375, "r": 159.41275024414062, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 528.9124755859375, "r": 185.57669067382812, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 528.9124755859375, "r": 219.59519958496094, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 528.9124755859375, "r": 253.61370849609375, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 517.9534301757812, "r": 103.36332702636719, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 517.9534301757812, "r": 159.41275024414062, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 169.37442016601562, "t": 517.9534301757812, "r": 193.4312744140625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 203.3929443359375, "t": 517.9534301757812, "r": 227.44979858398438, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 237.41146850585938, "t": 517.9534301757812, "r": 261.46832275390625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 86.37200164794922, "t": 506.595458984375, "r": 113.3160171508789, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Overall", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 506.595458984375, "r": 159.41275024414062, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 506.595458984375, "r": 185.57669067382812, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "73", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 506.595458984375, "r": 219.59519958496094, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 506.595458984375, "r": 253.61370849609375, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "77", "column_header": false, "row_header": false, "row_section": false}], "num_rows": 13, "num_cols": 5, "grid": [[{"bbox": {"l": 86.37200164794922, "t": 638.8994750976562, "r": 129.4645233154297, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Class-count", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 638.8994750976562, "r": 159.41275024414062, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "11", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 179.3181610107422, "t": 638.8994750976562, "r": 183.48753356933594, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "6", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 213.33668518066406, "t": 638.8994750976562, "r": 217.5060577392578, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "5", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 247.35520935058594, "t": 638.8994750976562, "r": 251.5245819091797, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "4", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 627.54248046875, "r": 115.55763244628906, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 627.54248046875, "r": 159.41275024414062, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 627.54248046875, "r": 189.38742065429688, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 627.54248046875, "r": 223.40594482421875, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 627.54248046875, "r": 257.4244689941406, "b": 619.1678466796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 616.5834350585938, "r": 118.87519836425781, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 616.5834350585938, "r": 159.41275024414062, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 616.5834350585938, "r": 189.38742065429688, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 616.5834350585938, "r": 223.40594482421875, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 616.5834350585938, "r": 257.4244689941406, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 605.6244506835938, "r": 116.88465881347656, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 605.6244506835938, "r": 159.41275024414062, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 605.6244506835938, "r": 189.38742065429688, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 207.4457550048828, "t": 605.6244506835938, "r": 223.40594482421875, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.4642791748047, "t": 605.6244506835938, "r": 257.4244689941406, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 594.6654663085938, "r": 119.25179290771484, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 594.6654663085938, "r": 159.41275024414062, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 173.42723083496094, "t": 594.6654663085938, "r": 189.38742065429688, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Text", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.2564697265625, "t": 594.6654663085938, "r": 219.59521484375, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 241.46426391601562, "t": 594.6654663085938, "r": 257.4244689941406, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Text", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 583.7064819335938, "r": 128.89964294433594, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 583.7064819335938, "r": 159.41275024414062, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 583.7064819335938, "r": 185.57669067382812, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 213.9105224609375, "t": 583.7064819335938, "r": 216.941162109375, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 247.92904663085938, "t": 583.7064819335938, "r": 250.95968627929688, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "-", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 572.7474365234375, "r": 131.65231323242188, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 572.7474365234375, "r": 159.41275024414062, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 572.7474365234375, "r": 185.57669067382812, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 213.9105224609375, "t": 572.7474365234375, "r": 216.941162109375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 247.92904663085938, "t": 572.7474365234375, "r": 250.95968627929688, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "-", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 561.7884521484375, "r": 112.35662841796875, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 561.7884521484375, "r": 159.41275024414062, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 561.7884521484375, "r": 185.57669067382812, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 561.7884521484375, "r": 219.59519958496094, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 561.7884521484375, "r": 253.61370849609375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 550.8304443359375, "r": 141.11187744140625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 550.8304443359375, "r": 159.41275024414062, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 550.8304443359375, "r": 185.57669067382812, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 550.8304443359375, "r": 219.59519958496094, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 550.8304443359375, "r": 253.61370849609375, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 539.8714599609375, "r": 106.17877960205078, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 539.8714599609375, "r": 159.41275024414062, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 539.8714599609375, "r": 185.57669067382812, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 539.8714599609375, "r": 219.59519958496094, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 539.8714599609375, "r": 253.61370849609375, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 528.9124755859375, "r": 102.33219909667969, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 528.9124755859375, "r": 159.41275024414062, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 528.9124755859375, "r": 185.57669067382812, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 528.9124755859375, "r": 219.59519958496094, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 528.9124755859375, "r": 253.61370849609375, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "84", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 517.9534301757812, "r": 103.36332702636719, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 517.9534301757812, "r": 159.41275024414062, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 169.37442016601562, "t": 517.9534301757812, "r": 193.4312744140625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 203.3929443359375, "t": 517.9534301757812, "r": 227.44979858398438, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 237.41146850585938, "t": 517.9534301757812, "r": 261.46832275390625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Sec.-h.", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 86.37200164794922, "t": 506.595458984375, "r": 113.3160171508789, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Overall", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 151.07400512695312, "t": 506.595458984375, "r": 159.41275024414062, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 177.23794555664062, "t": 506.595458984375, "r": 185.57669067382812, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "73", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 211.25645446777344, "t": 506.595458984375, "r": 219.59519958496094, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 245.27496337890625, "t": 506.595458984375, "r": 253.61370849609375, "b": 498.2208251953125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "77", "column_header": false, "row_header": false, "row_section": false}]]}}, {"self_ref": "#/tables/3", "parent": {"cref": "#/body"}, "children": [], "label": "table", "prov": [{"page_no": 7, "bbox": {"l": 352.97747802734375, "t": 641.208740234375, "r": 522.9158935546875, "b": 485.7341613769531, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [{"bbox": {"l": 358.6390075683594, "t": 638.8994750976562, "r": 401.7315368652344, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Class-count", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 440.2250061035156, "t": 638.8994750976562, "r": 448.5637512207031, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 3, "text": "11", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 494.3800048828125, "t": 638.8994750976562, "r": 498.54937744140625, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 3, "end_col_offset_idx": 5, "text": "5", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 627.9404907226562, "r": 375.27166748046875, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Split", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 423.34100341796875, "t": 627.9404907226562, "r": 438.0458984375, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Doc", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 448.007568359375, "t": 627.9404907226562, "r": 465.44720458984375, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Page", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 475.4110107421875, "t": 627.9404907226562, "r": 490.11590576171875, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Doc", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 500.07757568359375, "t": 627.9404907226562, "r": 517.5172119140625, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Page", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 616.5834350585938, "r": 387.82464599609375, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 616.5834350585938, "r": 434.86273193359375, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 616.5834350585938, "r": 460.9011535644531, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 605.6244506835938, "r": 391.1422119140625, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 605.6244506835938, "r": 434.86273193359375, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 605.6244506835938, "r": 460.9011535644531, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 594.6654663085938, "r": 389.15167236328125, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 594.6654663085938, "r": 434.86273193359375, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 594.6654663085938, "r": 460.9011535644531, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "66", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 583.7064819335938, "r": 391.518798828125, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 583.7064819335938, "r": 434.86273193359375, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 583.7064819335938, "r": 460.9011535644531, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 583.7064819335938, "r": 486.9327392578125, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 583.7064819335938, "r": 512.97119140625, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 572.7474365234375, "r": 401.1666564941406, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 572.7474365234375, "r": 434.86273193359375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 572.7474365234375, "r": 460.9011535644531, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 561.7884521484375, "r": 403.9193115234375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 561.7884521484375, "r": 434.86273193359375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 561.7884521484375, "r": 460.9011535644531, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 550.8304443359375, "r": 384.6236572265625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 550.8304443359375, "r": 434.86273193359375, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 550.8304443359375, "r": 460.9011535644531, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 550.8304443359375, "r": 486.9327392578125, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 550.8304443359375, "r": 512.97119140625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 539.8714599609375, "r": 413.37890625, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 539.8714599609375, "r": 434.86273193359375, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 539.8714599609375, "r": 460.9011535644531, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 539.8714599609375, "r": 486.9327392578125, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 539.8714599609375, "r": 512.97119140625, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 528.9124755859375, "r": 378.4457702636719, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 528.9124755859375, "r": 434.86273193359375, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 528.9124755859375, "r": 460.9011535644531, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 528.9124755859375, "r": 486.9327392578125, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 528.9124755859375, "r": 512.97119140625, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 517.9534301757812, "r": 374.5992126464844, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 517.9534301757812, "r": 434.86273193359375, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 517.9534301757812, "r": 460.9011535644531, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 517.9534301757812, "r": 486.9327392578125, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 517.9534301757812, "r": 512.97119140625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 506.9944763183594, "r": 375.6303405761719, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 506.9944763183594, "r": 434.86273193359375, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 506.9944763183594, "r": 460.9011535644531, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 358.6390075683594, "t": 495.637451171875, "r": 369.60491943359375, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "All", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 495.637451171875, "r": 434.86273193359375, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 495.637451171875, "r": 460.9011535644531, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 495.637451171875, "r": 486.9327392578125, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 495.637451171875, "r": 512.97119140625, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "87", "column_header": false, "row_header": false, "row_section": false}], "num_rows": 14, "num_cols": 5, "grid": [[{"bbox": {"l": 358.6390075683594, "t": 638.8994750976562, "r": 401.7315368652344, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Class-count", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 440.2250061035156, "t": 638.8994750976562, "r": 448.5637512207031, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 3, "text": "11", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 440.2250061035156, "t": 638.8994750976562, "r": 448.5637512207031, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 3, "text": "11", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 494.3800048828125, "t": 638.8994750976562, "r": 498.54937744140625, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 3, "end_col_offset_idx": 5, "text": "5", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 494.3800048828125, "t": 638.8994750976562, "r": 498.54937744140625, "b": 630.5248413085938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 2, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 3, "end_col_offset_idx": 5, "text": "5", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 627.9404907226562, "r": 375.27166748046875, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Split", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 423.34100341796875, "t": 627.9404907226562, "r": 438.0458984375, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Doc", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 448.007568359375, "t": 627.9404907226562, "r": 465.44720458984375, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "Page", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 475.4110107421875, "t": 627.9404907226562, "r": 490.11590576171875, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "Doc", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 500.07757568359375, "t": 627.9404907226562, "r": 517.5172119140625, "b": 619.5658569335938, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "Page", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 616.5834350585938, "r": 387.82464599609375, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Caption", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 616.5834350585938, "r": 434.86273193359375, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 616.5834350585938, "r": 460.9011535644531, "b": 608.2088012695312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 605.6244506835938, "r": 391.1422119140625, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Footnote", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 605.6244506835938, "r": 434.86273193359375, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 605.6244506835938, "r": 460.9011535644531, "b": 597.2498168945312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 594.6654663085938, "r": 389.15167236328125, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Formula", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 594.6654663085938, "r": 434.86273193359375, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "60", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 594.6654663085938, "r": 460.9011535644531, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "66", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 583.7064819335938, "r": 391.518798828125, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "List-item", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 583.7064819335938, "r": 434.86273193359375, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 583.7064819335938, "r": 460.9011535644531, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "88", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 583.7064819335938, "r": 486.9327392578125, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 583.7064819335938, "r": 512.97119140625, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "88", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 572.7474365234375, "r": 401.1666564941406, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-footer", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 572.7474365234375, "r": 434.86273193359375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "62", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 572.7474365234375, "r": 460.9011535644531, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 561.7884521484375, "r": 403.9193115234375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Page-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 561.7884521484375, "r": 434.86273193359375, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 561.7884521484375, "r": 460.9011535644531, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "90", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 550.8304443359375, "r": 384.6236572265625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Picture", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 550.8304443359375, "r": 434.86273193359375, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 550.8304443359375, "r": 460.9011535644531, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 550.8304443359375, "r": 486.9327392578125, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 550.8304443359375, "r": 512.97119140625, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 539.8714599609375, "r": 413.37890625, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Section-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 539.8714599609375, "r": 434.86273193359375, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 539.8714599609375, "r": 460.9011535644531, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "83", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 539.8714599609375, "r": 486.9327392578125, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "69", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 539.8714599609375, "r": 512.97119140625, "b": 531.496826171875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "83", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 528.9124755859375, "r": 378.4457702636719, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 528.9124755859375, "r": 434.86273193359375, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 528.9124755859375, "r": 460.9011535644531, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "89", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 528.9124755859375, "r": 486.9327392578125, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 528.9124755859375, "r": 512.97119140625, "b": 520.537841796875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "90", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 517.9534301757812, "r": 374.5992126464844, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 517.9534301757812, "r": 434.86273193359375, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "85", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 517.9534301757812, "r": 460.9011535644531, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "91", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 517.9534301757812, "r": 486.9327392578125, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 517.9534301757812, "r": 512.97119140625, "b": 509.5788269042969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "90", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 506.9944763183594, "r": 375.6303405761719, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Title", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 506.9944763183594, "r": 434.86273193359375, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 506.9944763183594, "r": 460.9011535644531, "b": 498.6198425292969, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "81", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 358.6390075683594, "t": 495.637451171875, "r": 369.60491943359375, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "All", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 426.52398681640625, "t": 495.637451171875, "r": 434.86273193359375, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 452.5624084472656, "t": 495.637451171875, "r": 460.9011535644531, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 478.593994140625, "t": 495.637451171875, "r": 486.9327392578125, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "78", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 504.6324157714844, "t": 495.637451171875, "r": 512.97119140625, "b": 487.2628173828125, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "87", "column_header": false, "row_header": false, "row_section": false}]]}}, {"self_ref": "#/tables/4", "parent": {"cref": "#/body"}, "children": [], "label": "table", "prov": [{"page_no": 8, "bbox": {"l": 72.6590347290039, "t": 619.5191650390625, "r": 274.83465576171875, "b": 452.1459655761719, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [{"cref": "#/texts/477"}], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 217.74099731445312, "t": 616.9814453125, "r": 256.2606506347656, "b": 608.6068115234375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 3, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 5, "text": "Testing on", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 89.9540023803711, "t": 606.0234375, "r": 133.24378967285156, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Training on", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 606.0234375, "r": 175.4758758544922, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "labels", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 204.69000244140625, "t": 606.0234375, "r": 220.5426025390625, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "PLN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 230.5042724609375, "t": 606.0234375, "r": 242.0619659423828, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "DB", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 252.0236358642578, "t": 606.0234375, "r": 269.31085205078125, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "DLN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 594.6654663085938, "r": 177.9237060546875, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 594.6654663085938, "r": 216.78575134277344, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 594.6654663085938, "r": 240.45704650878906, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "43", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 594.6654663085938, "r": 264.836669921875, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "23", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 583.7064819335938, "r": 194.72674560546875, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Sec-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 583.7064819335938, "r": 216.78575134277344, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 583.7064819335938, "r": 237.80299377441406, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 583.7064819335938, "r": 264.836669921875, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "32", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 572.7474365234375, "r": 174.43577575683594, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 572.7474365234375, "r": 216.78575134277344, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 572.7474365234375, "r": 240.45704650878906, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "24", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 572.7474365234375, "r": 264.836669921875, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "49", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 561.7884521484375, "r": 170.5891876220703, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 561.7884521484375, "r": 216.78575134277344, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 561.7884521484375, "r": 237.80299377441406, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 561.7884521484375, "r": 264.836669921875, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "42", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 550.8304443359375, "r": 171.27960205078125, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 550.8304443359375, "r": 216.78575134277344, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 550.8304443359375, "r": 240.45704650878906, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "34", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 550.8304443359375, "r": 264.836669921875, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "30", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 78.53099822998047, "t": 528.5134887695312, "r": 131.1996307373047, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 3, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocBank (DB)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 539.4724731445312, "r": 177.9237060546875, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 539.4724731445312, "r": 216.78575134277344, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 539.4724731445312, "r": 240.45704650878906, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 539.4724731445312, "r": 264.836669921875, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "31", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 528.5134887695312, "r": 174.43577575683594, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 528.5134887695312, "r": 216.78575134277344, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "19", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 528.5134887695312, "r": 240.45704650878906, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "65", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 528.5134887695312, "r": 264.836669921875, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "22", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 517.554443359375, "r": 171.27960205078125, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 517.554443359375, "r": 216.78575134277344, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "48", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 517.554443359375, "r": 240.45704650878906, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 517.554443359375, "r": 264.836669921875, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "27", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 506.19744873046875, "r": 177.9237060546875, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 506.19744873046875, "r": 216.78575134277344, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 506.19744873046875, "r": 240.45704650878906, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "51", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 506.19744873046875, "r": 264.836669921875, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 495.23846435546875, "r": 194.72674560546875, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Sec-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 495.23846435546875, "r": 216.78575134277344, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "53", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 495.23846435546875, "r": 237.80299377441406, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 495.23846435546875, "r": 264.836669921875, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 484.2794494628906, "r": 174.43577575683594, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 484.2794494628906, "r": 216.78575134277344, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 484.2794494628906, "r": 240.45704650878906, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "43", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 484.2794494628906, "r": 264.836669921875, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 473.3204650878906, "r": 170.5891876220703, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 473.3204650878906, "r": 216.78575134277344, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 473.3204650878906, "r": 237.80299377441406, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 473.3204650878906, "r": 264.836669921875, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "84", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 462.3614501953125, "r": 171.27960205078125, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 462.3614501953125, "r": 216.78575134277344, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "59", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 462.3614501953125, "r": 240.45704650878906, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "47", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 462.3614501953125, "r": 264.836669921875, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "78", "column_header": false, "row_header": false, "row_section": false}], "num_rows": 15, "num_cols": 5, "grid": [[{"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": null, "row_span": 1, "col_span": 1, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 217.74099731445312, "t": 616.9814453125, "r": 256.2606506347656, "b": 608.6068115234375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 3, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 5, "text": "Testing on", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 217.74099731445312, "t": 616.9814453125, "r": 256.2606506347656, "b": 608.6068115234375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 3, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 5, "text": "Testing on", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 217.74099731445312, "t": 616.9814453125, "r": 256.2606506347656, "b": 608.6068115234375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 3, "start_row_offset_idx": 0, "end_row_offset_idx": 1, "start_col_offset_idx": 2, "end_col_offset_idx": 5, "text": "Testing on", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 89.9540023803711, "t": 606.0234375, "r": 133.24378967285156, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "Training on", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 606.0234375, "r": 175.4758758544922, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "labels", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 204.69000244140625, "t": 606.0234375, "r": 220.5426025390625, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "PLN", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 230.5042724609375, "t": 606.0234375, "r": 242.0619659423828, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "DB", "column_header": true, "row_header": false, "row_section": false}, {"bbox": {"l": 252.0236358642578, "t": 606.0234375, "r": 269.31085205078125, "b": 597.6488037109375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 1, "end_row_offset_idx": 2, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "DLN", "column_header": true, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 594.6654663085938, "r": 177.9237060546875, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 594.6654663085938, "r": 216.78575134277344, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 594.6654663085938, "r": 240.45704650878906, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "43", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 594.6654663085938, "r": 264.836669921875, "b": 586.2908325195312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 3, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "23", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 583.7064819335938, "r": 194.72674560546875, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Sec-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 583.7064819335938, "r": 216.78575134277344, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 583.7064819335938, "r": 237.80299377441406, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 583.7064819335938, "r": 264.836669921875, "b": 575.3318481445312, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 3, "end_row_offset_idx": 4, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "32", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 572.7474365234375, "r": 174.43577575683594, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 572.7474365234375, "r": 216.78575134277344, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "95", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 572.7474365234375, "r": 240.45704650878906, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "24", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 572.7474365234375, "r": 264.836669921875, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 4, "end_row_offset_idx": 5, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "49", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 561.7884521484375, "r": 170.5891876220703, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 561.7884521484375, "r": 216.78575134277344, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "96", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 561.7884521484375, "r": 237.80299377441406, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 561.7884521484375, "r": 264.836669921875, "b": 553.413818359375, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 5, "end_row_offset_idx": 6, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "42", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 572.7474365234375, "r": 142.56005859375, "b": 564.372802734375, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 2, "end_row_offset_idx": 7, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "PubLayNet (PLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 550.8304443359375, "r": 171.27960205078125, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 550.8304443359375, "r": 216.78575134277344, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "93", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 550.8304443359375, "r": 240.45704650878906, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "34", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 550.8304443359375, "r": 264.836669921875, "b": 542.455810546875, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 6, "end_row_offset_idx": 7, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "30", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 528.5134887695312, "r": 131.1996307373047, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 3, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocBank (DB)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 539.4724731445312, "r": 177.9237060546875, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 539.4724731445312, "r": 216.78575134277344, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 539.4724731445312, "r": 240.45704650878906, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "71", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 539.4724731445312, "r": 264.836669921875, "b": 531.0978393554688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 8, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "31", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 528.5134887695312, "r": 131.1996307373047, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 3, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocBank (DB)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 528.5134887695312, "r": 174.43577575683594, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 528.5134887695312, "r": 216.78575134277344, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "19", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 528.5134887695312, "r": 240.45704650878906, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "65", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 528.5134887695312, "r": 264.836669921875, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 8, "end_row_offset_idx": 9, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "22", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 528.5134887695312, "r": 131.1996307373047, "b": 520.1388549804688, "coord_origin": "BOTTOMLEFT"}, "row_span": 3, "col_span": 1, "start_row_offset_idx": 7, "end_row_offset_idx": 10, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocBank (DB)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 517.554443359375, "r": 171.27960205078125, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 517.554443359375, "r": 216.78575134277344, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "48", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 517.554443359375, "r": 240.45704650878906, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "68", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 517.554443359375, "r": 264.836669921875, "b": 509.1798400878906, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 9, "end_row_offset_idx": 10, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "27", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 506.19744873046875, "r": 177.9237060546875, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Figure", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 506.19744873046875, "r": 216.78575134277344, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "67", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 506.19744873046875, "r": 240.45704650878906, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "51", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 506.19744873046875, "r": 264.836669921875, "b": 497.82281494140625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 11, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "72", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 495.23846435546875, "r": 194.72674560546875, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Sec-header", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 495.23846435546875, "r": 216.78575134277344, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "53", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 495.23846435546875, "r": 237.80299377441406, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 495.23846435546875, "r": 264.836669921875, "b": 486.86383056640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 11, "end_row_offset_idx": 12, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "68", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 484.2794494628906, "r": 174.43577575683594, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Table", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 484.2794494628906, "r": 216.78575134277344, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "87", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 484.2794494628906, "r": 240.45704650878906, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "43", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 484.2794494628906, "r": 264.836669921875, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 12, "end_row_offset_idx": 13, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "82", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 473.3204650878906, "r": 170.5891876220703, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "Text", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 473.3204650878906, "r": 216.78575134277344, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "77", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 234.77235412597656, "t": 473.3204650878906, "r": 237.80299377441406, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "-", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 473.3204650878906, "r": 264.836669921875, "b": 464.9458312988281, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 13, "end_row_offset_idx": 14, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "84", "column_header": false, "row_header": false, "row_section": false}], [{"bbox": {"l": 78.53099822998047, "t": 484.2794494628906, "r": 144.6671600341797, "b": 475.9048156738281, "coord_origin": "BOTTOMLEFT"}, "row_span": 5, "col_span": 1, "start_row_offset_idx": 10, "end_row_offset_idx": 15, "start_col_offset_idx": 0, "end_col_offset_idx": 1, "text": "DocLayNet (DLN)", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 154.62899780273438, "t": 462.3614501953125, "r": 171.27960205078125, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 1, "end_col_offset_idx": 2, "text": "total", "column_header": false, "row_header": true, "row_section": false}, {"bbox": {"l": 208.44700622558594, "t": 462.3614501953125, "r": 216.78575134277344, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 2, "end_col_offset_idx": 3, "text": "59", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 232.11830139160156, "t": 462.3614501953125, "r": 240.45704650878906, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 3, "end_col_offset_idx": 4, "text": "47", "column_header": false, "row_header": false, "row_section": false}, {"bbox": {"l": 256.4979248046875, "t": 462.3614501953125, "r": 264.836669921875, "b": 453.98681640625, "coord_origin": "BOTTOMLEFT"}, "row_span": 1, "col_span": 1, "start_row_offset_idx": 14, "end_row_offset_idx": 15, "start_col_offset_idx": 4, "end_col_offset_idx": 5, "text": "78", "column_header": false, "row_header": false, "row_section": false}]]}}], "key_value_items": [], "pages": {"1": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 1}, "2": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 2}, "3": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 3}, "4": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 4}, "5": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 5}, "6": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 6}, "7": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 7}, "8": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 8}, "9": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 9}}} \ No newline at end of file diff --git a/test/data/doc/2206.01062.yaml.et b/test/data/doc/2206.01062.yaml.et index 241eb081..dec2edea 100644 --- a/test/data/doc/2206.01062.yaml.et +++ b/test/data/doc/2206.01062.yaml.et @@ -17,78 +17,78 @@ 16: text 17: section_header 18: text - 19: page_header - 20: section_header + 19: section_header + 20: text 21: text 22: text - 23: text + 23: list_item 24: list_item 25: list_item 26: list_item - 27: list_item - 28: footnote - 29: text - 30: list_item + 27: footnote + 28: text + 29: list_item + 30: text 31: text - 32: text - 33: section_header + 32: section_header + 33: text 34: text - 35: text - 36: section_header + 35: section_header + 36: text 37: text - 38: text - 39: page_header - 40: page_header - 41: caption - 42: picture - 43: text + 38: caption + 39: picture + 40: text + 41: text + 42: text + 43: footnote 44: text 45: text - 46: footnote - 47: text + 46: text + 47: section_header 48: text - 49: text - 50: section_header + 49: caption + 50: table 51: text - 52: page_header - 53: caption - 54: table + 52: text + 53: text + 54: text 55: text - 56: text + 56: footnote 57: text 58: text 59: text - 60: footnote - 61: page_header - 62: page_header - 63: text - 64: text - 65: text - 66: list_item - 67: list_item - 68: list_item - 69: list_item - 70: list_item - 71: list_item - 72: text - 73: text - 74: caption - 75: picture - 76: text - 77: text - 78: caption - 79: table - 80: text - 81: section_header + 60: list_item + 61: list_item + 62: list_item + 63: list_item + 64: list_item + 65: list_item + 66: text + 67: text + 68: caption + 69: picture + 70: text + 71: text + 72: caption + 73: table + 74: text + 75: section_header + 76: caption + 77: picture + 78: text + 79: text + 80: section_header + 81: text 82: caption - 83: picture - 84: text + 83: table + 84: section_header 85: text 86: section_header 87: text - 88: page_header - 89: caption - 90: table + 88: caption + 89: table + 90: text 91: section_header 92: text 93: section_header @@ -96,44 +96,35 @@ 95: caption 96: table 97: text - 98: section_header - 99: text - 100: section_header - 101: text - 102: caption - 103: table - 104: text - 105: text - 106: section_header - 107: text - 108: section_header - 109: text - 110: text - 111: section_header + 98: text + 99: section_header + 100: text + 101: section_header + 102: text + 103: text + 104: section_header + 105: list_item + 106: list_item + 107: list_item + 108: list_item + 109: list_item + 110: list_item + 111: list_item 112: list_item 113: list_item 114: list_item 115: list_item 116: list_item 117: list_item - 118: list_item - 119: list_item - 120: list_item + 118: caption + 119: picture + 120: text 121: list_item 122: list_item 123: list_item 124: list_item - 125: page_header - 126: page_header - 127: caption - 128: picture - 129: text - 130: list_item - 131: list_item - 132: list_item - 133: list_item - 134: list_item - 135: list_item - 136: list_item - 137: list_item - 138: list_item \ No newline at end of file + 125: list_item + 126: list_item + 127: list_item + 128: list_item + 129: list_item \ No newline at end of file diff --git a/test/data/legacy_doc/doc-export.docling.yaml.gt b/test/data/legacy_doc/doc-export.docling.yaml.gt index 6dbf5c63..53508a37 100644 --- a/test/data/legacy_doc/doc-export.docling.yaml.gt +++ b/test/data/legacy_doc/doc-export.docling.yaml.gt @@ -6615,4 +6615,4 @@ texts: text: '23. Zhong, X., Tang, J., Yepes, A.J.: Publaynet: largest dataset ever for document layout analysis. In: 2019 International Conference on Document Analysis and Recognition (ICDAR). pp. 1015-1022. IEEE (2019)' -version: 1.0.0 +version: 1.1.0 diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py index 286f73b7..0ababb5d 100644 --- a/test/test_docling_doc.py +++ b/test/test_docling_doc.py @@ -14,6 +14,7 @@ from docling_core.types.doc.document import ( # BoundingBox, CURRENT_VERSION, CodeItem, + ContentLayer, DocItem, DoclingDocument, DocumentOrigin, @@ -627,6 +628,13 @@ def test_image_ref(): assert image.uri.name == "image.png" +def test_upgrade_content_layer_from_1_0_0(): + doc = DoclingDocument.load_from_json("test/data/doc/2206.01062-1.0.0.json") + + assert doc.version == CURRENT_VERSION + assert doc.texts[0].content_layer == ContentLayer.FURNITURE + + def test_version_doc(): # default version From 20839419380f8d80e1c54d398ca4e5ad18a7b676 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 5 Feb 2025 12:45:21 +0100 Subject: [PATCH 09/10] Update tests Signed-off-by: Christoph Auer --- test/data/chunker/0_out_chunks.json | 515 ---------------------------- test/data/chunker/1_out_chunks.json | 515 ---------------------------- 2 files changed, 1030 deletions(-) diff --git a/test/data/chunker/0_out_chunks.json b/test/data/chunker/0_out_chunks.json index 94cb63fd..fad8a5cc 100644 --- a/test/data/chunker/0_out_chunks.json +++ b/test/data/chunker/0_out_chunks.json @@ -1,44 +1,5 @@ { "root": [ - { - "text": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 17.088111877441406, - "t": 583.2296752929688, - "r": 36.339778900146484, - "b": 231.99996948242188, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 38 - ] - } - ] - } - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Version 1.0", "meta": { @@ -294,92 +255,6 @@ } } }, - { - "text": "Docling Technical Report", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/10", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 107.10411071777344, - "t": 58.48394775390625, - "r": 200.8249969482422, - "b": 49.8505859375, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 24 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "1 Introduction" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, - { - "text": "1", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/11", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 303.50897216796875, - "t": 49.50579833984375, - "r": 308.4902648925781, - "b": 39.960147857666016, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "1 Introduction" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Here is what Docling delivers today:", "meta": { @@ -894,50 +769,6 @@ } } }, - { - "text": "2", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/29", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 2, - "bbox": { - "l": 302.96832275390625, - "t": 49.7403564453125, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "3 Processing pipeline", - "3.1 PDF backends" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible.", "meta": { @@ -1249,51 +1080,6 @@ } } }, - { - "text": "3", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/40", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 3, - "bbox": { - "l": 302.7810974121094, - "t": 49.40008544921875, - "r": 308.4903259277344, - "b": 39.96010971069336, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "3 Processing pipeline", - "3.2 AI models", - "Table Structure Recognition" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells.", "meta": { @@ -1690,49 +1476,6 @@ } } }, - { - "text": "4", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/54", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 4, - "bbox": { - "l": 302.41058349609375, - "t": 49.65472412109375, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "4 Performance" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report.", "meta": { @@ -2062,49 +1805,6 @@ } } }, - { - "text": "5", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/65", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 5, - "bbox": { - "l": 302.7286376953125, - "t": 49.4200439453125, - "r": 308.49029541015625, - "b": 39.96018600463867, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "References" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf .", "meta": { @@ -2516,49 +2216,6 @@ } } }, - { - "text": "6", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/81", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 6, - "bbox": { - "l": 302.7389221191406, - "t": 49.36236572265625, - "r": 308.5960998535156, - "b": 39.96012496948242, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "References" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON.", "meta": { @@ -3677,92 +3334,6 @@ } } }, - { - "text": "7", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/119", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 7, - "bbox": { - "l": 302.8258056640625, - "t": 49.2652587890625, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "ACM Reference Format:" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, - { - "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/120", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_header", - "prov": [ - { - "page_no": 7, - "bbox": { - "l": 110.2352066040039, - "t": 618.2011108398438, - "r": 118.32157135009766, - "b": 492.749267578125, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 37 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "ACM Reference Format:" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "meta": { @@ -4540,49 +4111,6 @@ } } }, - { - "text": "8", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/141", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 8, - "bbox": { - "l": 303.0059509277344, - "t": 48.90887451171875, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "Baselines for Object Detection" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "meta": { @@ -4929,49 +4457,6 @@ "filename": "2408.09869v3.pdf" } } - }, - { - "text": "9", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/149", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 9, - "bbox": { - "l": 302.54315185546875, - "t": 49.2738037109375, - "r": 308.49029541015625, - "b": 39.96010971069336, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "Baselines for Object Detection" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } } ] } diff --git a/test/data/chunker/1_out_chunks.json b/test/data/chunker/1_out_chunks.json index a9e55a20..02407d3d 100644 --- a/test/data/chunker/1_out_chunks.json +++ b/test/data/chunker/1_out_chunks.json @@ -1,44 +1,5 @@ { "root": [ - { - "text": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 17.088111877441406, - "t": 583.2296752929688, - "r": 36.339778900146484, - "b": 231.99996948242188, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 38 - ] - } - ] - } - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Version 1.0", "meta": { @@ -294,92 +255,6 @@ } } }, - { - "text": "Docling Technical Report", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/10", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 107.10411071777344, - "t": 58.48394775390625, - "r": 200.8249969482422, - "b": 49.8505859375, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 24 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "1 Introduction" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, - { - "text": "1", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/11", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 303.50897216796875, - "t": 49.50579833984375, - "r": 308.4902648925781, - "b": 39.960147857666016, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "1 Introduction" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Here is what Docling delivers today:", "meta": { @@ -984,50 +859,6 @@ } } }, - { - "text": "2", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/29", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 2, - "bbox": { - "l": 302.96832275390625, - "t": 49.7403564453125, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "3 Processing pipeline", - "3.1 PDF backends" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible.", "meta": { @@ -1339,51 +1170,6 @@ } } }, - { - "text": "3", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/40", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 3, - "bbox": { - "l": 302.7810974121094, - "t": 49.40008544921875, - "r": 308.4903259277344, - "b": 39.96010971069336, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "3 Processing pipeline", - "3.2 AI models", - "Table Structure Recognition" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells.", "meta": { @@ -1780,49 +1566,6 @@ } } }, - { - "text": "4", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/54", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 4, - "bbox": { - "l": 302.41058349609375, - "t": 49.65472412109375, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "4 Performance" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report.", "meta": { @@ -2170,49 +1913,6 @@ } } }, - { - "text": "5", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/65", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 5, - "bbox": { - "l": 302.7286376953125, - "t": 49.4200439453125, - "r": 308.49029541015625, - "b": 39.96018600463867, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "References" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf .", "meta": { @@ -2858,49 +2558,6 @@ } } }, - { - "text": "6", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/81", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 6, - "bbox": { - "l": 302.7389221191406, - "t": 49.36236572265625, - "r": 308.5960998535156, - "b": 39.96012496948242, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "References" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON.", "meta": { @@ -4019,92 +3676,6 @@ } } }, - { - "text": "7", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/119", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 7, - "bbox": { - "l": 302.8258056640625, - "t": 49.2652587890625, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "ACM Reference Format:" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, - { - "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/120", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_header", - "prov": [ - { - "page_no": 7, - "bbox": { - "l": 110.2352066040039, - "t": 618.2011108398438, - "r": 118.32157135009766, - "b": 492.749267578125, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 37 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "ACM Reference Format:" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "meta": { @@ -4882,49 +4453,6 @@ } } }, - { - "text": "8", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/141", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 8, - "bbox": { - "l": 303.0059509277344, - "t": 48.90887451171875, - "r": 308.49029541015625, - "b": 39.960079193115234, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "Baselines for Object Detection" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } - }, { "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", "meta": { @@ -5271,49 +4799,6 @@ "filename": "2408.09869v3.pdf" } } - }, - { - "text": "9", - "meta": { - "schema_name": "docling_core.transforms.chunker.DocMeta", - "version": "1.0.0", - "doc_items": [ - { - "self_ref": "#/texts/149", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "page_footer", - "prov": [ - { - "page_no": 9, - "bbox": { - "l": 302.54315185546875, - "t": 49.2738037109375, - "r": 308.49029541015625, - "b": 39.96010971069336, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 1 - ] - } - ] - } - ], - "headings": [ - "Docling Technical Report", - "Baselines for Object Detection" - ], - "origin": { - "mimetype": "application/pdf", - "binary_hash": 14981478401387673002, - "filename": "2408.09869v3.pdf" - } - } } ] } From 46ffd203e0c86202acd0a05d17ee6f4723aa2f57 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 5 Feb 2025 13:35:02 +0100 Subject: [PATCH 10/10] Address review comments Signed-off-by: Christoph Auer --- docling_core/types/doc/document.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index ee7a47be..2d8f087d 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -63,6 +63,8 @@ DocItemLabel.LIST_ITEM, DocItemLabel.CODE, DocItemLabel.REFERENCE, + DocItemLabel.PAGE_HEADER, + DocItemLabel.PAGE_FOOTER, } @@ -513,6 +515,9 @@ class ContentLayer(str, Enum): FURNITURE = "furniture" +DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY} + + class NodeItem(BaseModel): """NodeItem.""" @@ -1851,7 +1856,7 @@ def iterate_items( with_groups: bool = False, traverse_pictures: bool = False, page_no: Optional[int] = None, - included_content_layers: List[ContentLayer] = [ContentLayer.BODY], + included_content_layers: set[ContentLayer] = DEFAULT_CONTENT_LAYERS, _level: int = 0, # fixed parameter, carries through the node nesting level ) -> typing.Iterable[Tuple[NodeItem, int]]: # tuple of node and level """iterate_elements.