From 926dfd29d51c52628fe9fe8acb0ad0121c88e58a Mon Sep 17 00:00:00 2001 From: "Peter W. J. Staar" <91719829+PeterStaar-IBM@users.noreply.github.com> Date: Tue, 19 Nov 2024 12:21:17 +0100 Subject: [PATCH] feat: added excel backend (#334) * feat: added excel backend Signed-off-by: Peter Staar * first msexcel backend Signed-off-by: Peter Staar * added tooling for the cli Signed-off-by: Peter Staar * first working version for excel parsing of tables Signed-off-by: Peter Staar * added proper typing for mypy Signed-off-by: Peter Staar * added proper typing for mypy Signed-off-by: Peter Staar * refactor EXCEL to XLSX Signed-off-by: Peter Staar * added the unit tests Signed-off-by: Peter Staar * ran poetry lock Signed-off-by: Peter Staar * adding images to output [WIP] Signed-off-by: Peter Staar * reformatted the code Signed-off-by: Peter Staar * fixed the mypy Signed-off-by: Peter Staar * updated the msexcel Signed-off-by: Peter Staar * updated the msexcel (2) Signed-off-by: Peter Staar * fixed the mypy Signed-off-by: Peter Staar * added tests for merged cells in excel Signed-off-by: Peter Staar * reformatted the code Signed-off-by: Peter Staar --------- Signed-off-by: Peter Staar --- docling/backend/msexcel_backend.py | 374 ++ docling/datamodel/base_models.py | 6 + docling/document_converter.py | 9 + poetry.lock | 212 +- pyproject.toml | 2 + .../groundtruth/docling_v2/test-01.xlsx.itxt | 10 + .../groundtruth/docling_v2/test-01.xlsx.json | 3240 +++++++++++++++++ .../groundtruth/docling_v2/test-01.xlsx.md | 51 + tests/data/xlsx/test-01.xlsx | Bin 0 -> 21387 bytes tests/test_backend_msexcel.py | 77 + 10 files changed, 3887 insertions(+), 94 deletions(-) create mode 100644 docling/backend/msexcel_backend.py create mode 100644 tests/data/groundtruth/docling_v2/test-01.xlsx.itxt create mode 100644 tests/data/groundtruth/docling_v2/test-01.xlsx.json create mode 100644 tests/data/groundtruth/docling_v2/test-01.xlsx.md create mode 100644 tests/data/xlsx/test-01.xlsx create mode 100644 tests/test_backend_msexcel.py diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py new file mode 100644 index 00000000..508b0e8d --- /dev/null +++ b/docling/backend/msexcel_backend.py @@ -0,0 +1,374 @@ +import logging +from io import BytesIO +from pathlib import Path +from typing import Dict, Set, Tuple, Union + +from docling_core.types.doc import ( + DoclingDocument, + DocumentOrigin, + GroupLabel, + ImageRef, + TableCell, + TableData, +) + +# from lxml import etree +from openpyxl import Workbook, load_workbook +from openpyxl.cell.cell import Cell +from openpyxl.drawing.image import Image +from openpyxl.worksheet.worksheet import Worksheet + +from docling.backend.abstract_backend import DeclarativeDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import InputDocument + +_log = logging.getLogger(__name__) + +from typing import Any, List + +from pydantic import BaseModel + + +class ExcelCell(BaseModel): + row: int + col: int + text: str + row_span: int + col_span: int + + +class ExcelTable(BaseModel): + num_rows: int + num_cols: int + data: List[ExcelCell] + + +class MsExcelDocumentBackend(DeclarativeDocumentBackend): + + def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): + super().__init__(in_doc, path_or_stream) + + # Initialise the parents for the hierarchy + self.max_levels = 10 + + self.parents: Dict[int, Any] = {} + for i in range(-1, self.max_levels): + self.parents[i] = None + + self.workbook = None + try: + if isinstance(self.path_or_stream, BytesIO): + self.workbook = load_workbook(filename=self.path_or_stream) + + elif isinstance(self.path_or_stream, Path): + self.workbook = load_workbook(filename=str(self.path_or_stream)) + + self.valid = True + except Exception as e: + self.valid = False + + raise RuntimeError( + f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}" + ) from e + + def is_valid(self) -> bool: + _log.info(f"valid: {self.valid}") + return self.valid + + @classmethod + def supports_pagination(cls) -> bool: + return True + + def unload(self): + if isinstance(self.path_or_stream, BytesIO): + self.path_or_stream.close() + + self.path_or_stream = None + + @classmethod + def supported_formats(cls) -> Set[InputFormat]: + return {InputFormat.XLSX} + + def convert(self) -> DoclingDocument: + # Parses the XLSX into a structured document model. + + origin = DocumentOrigin( + filename=self.file.name or "file.xlsx", + mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + binary_hash=self.document_hash, + ) + + doc = DoclingDocument(name=self.file.stem or "file.xlsx", origin=origin) + + if self.is_valid(): + doc = self._convert_workbook(doc) + else: + raise RuntimeError( + f"Cannot convert doc with {self.document_hash} because the backend failed to init." + ) + + return doc + + def _convert_workbook(self, doc: DoclingDocument) -> DoclingDocument: + + if self.workbook is not None: + + # Iterate over all sheets + for sheet_name in self.workbook.sheetnames: + _log.info(f"Processing sheet: {sheet_name}") + + # Access the sheet by name + sheet = self.workbook[sheet_name] + + self.parents[0] = doc.add_group( + parent=None, + label=GroupLabel.SECTION, + name=f"sheet: {sheet_name}", + ) + + doc = self._convert_sheet(doc, sheet) + else: + _log.error("Workbook is not initialized.") + + return doc + + def _convert_sheet(self, doc: DoclingDocument, sheet: Worksheet): + + doc = self._find_tables_in_sheet(doc, sheet) + + doc = self._find_images_in_sheet(doc, sheet) + + return doc + + def _find_tables_in_sheet(self, doc: DoclingDocument, sheet: Worksheet): + + tables = self._find_data_tables(sheet) + + for excel_table in tables: + num_rows = excel_table.num_rows + num_cols = excel_table.num_cols + + table_data = TableData( + num_rows=num_rows, + num_cols=num_cols, + table_cells=[], + ) + + for excel_cell in excel_table.data: + + cell = TableCell( + text=excel_cell.text, + row_span=excel_cell.row_span, + col_span=excel_cell.col_span, + start_row_offset_idx=excel_cell.row, + end_row_offset_idx=excel_cell.row + excel_cell.row_span, + start_col_offset_idx=excel_cell.col, + end_col_offset_idx=excel_cell.col + excel_cell.col_span, + col_header=False, + row_header=False, + ) + table_data.table_cells.append(cell) + + doc.add_table(data=table_data, parent=self.parents[0]) + + return doc + + def _find_data_tables(self, sheet: Worksheet): + """ + Find all compact rectangular data tables in a sheet. + """ + # _log.info("find_data_tables") + + tables = [] # List to store found tables + visited: set[Tuple[int, int]] = set() # Track already visited cells + + # Iterate over all cells in the sheet + for ri, row in enumerate(sheet.iter_rows(values_only=False)): + for rj, cell in enumerate(row): + + # Skip empty or already visited cells + if cell.value is None or (ri, rj) in visited: + continue + + # If the cell starts a new table, find its bounds + table_bounds, visited_cells = self._find_table_bounds( + sheet, ri, rj, visited + ) + + visited.update(visited_cells) # Mark these cells as visited + tables.append(table_bounds) + + return tables + + def _find_table_bounds( + self, + sheet: Worksheet, + start_row: int, + start_col: int, + visited: set[Tuple[int, int]], + ): + """ + Determine the bounds of a compact rectangular table. + Returns: + - A dictionary with the bounds and data. + - A set of visited cell coordinates. + """ + _log.info("find_table_bounds") + + max_row = self._find_table_bottom(sheet, start_row, start_col) + max_col = self._find_table_right(sheet, start_row, start_col) + + # Collect the data within the bounds + data = [] + visited_cells = set() + for ri in range(start_row, max_row + 1): + for rj in range(start_col, max_col + 1): + + cell = sheet.cell(row=ri + 1, column=rj + 1) # 1-based indexing + + # Check if the cell belongs to a merged range + row_span = 1 + col_span = 1 + + # _log.info(sheet.merged_cells.ranges) + for merged_range in sheet.merged_cells.ranges: + + if ( + merged_range.min_row <= ri + 1 + and ri + 1 <= merged_range.max_row + and merged_range.min_col <= rj + 1 + and rj + 1 <= merged_range.max_col + ): + + row_span = merged_range.max_row - merged_range.min_row + 1 + col_span = merged_range.max_col - merged_range.min_col + 1 + break + + if (ri, rj) not in visited_cells: + data.append( + ExcelCell( + row=ri - start_row, + col=rj - start_col, + text=str(cell.value), + row_span=row_span, + col_span=col_span, + ) + ) + # _log.info(f"cell: {ri}, {rj} -> {ri - start_row}, {rj - start_col}, {row_span}, {col_span}: {str(cell.value)}") + + # Mark all cells in the span as visited + for span_row in range(ri, ri + row_span): + for span_col in range(rj, rj + col_span): + visited_cells.add((span_row, span_col)) + + return ( + ExcelTable( + num_rows=max_row + 1 - start_row, + num_cols=max_col + 1 - start_col, + data=data, + ), + visited_cells, + ) + + def _find_table_bottom(self, sheet: Worksheet, start_row: int, start_col: int): + """Function to find the bottom boundary of the table""" + + max_row = start_row + + while max_row < sheet.max_row - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=max_row + 2, column=start_col + 1) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_row to include the merged range if applicable + if merged_range: + max_row = max(max_row, merged_range.max_row - 1) + else: + max_row += 1 + + return max_row + + def _find_table_right(self, sheet: Worksheet, start_row: int, start_col: int): + """Function to find the right boundary of the table""" + + max_col = start_col + + while max_col < sheet.max_column - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=start_row + 1, column=max_col + 2) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_col to include the merged range if applicable + if merged_range: + max_col = max(max_col, merged_range.max_col - 1) + else: + max_col += 1 + + return max_col + + def _find_images_in_sheet( + self, doc: DoclingDocument, sheet: Worksheet + ) -> DoclingDocument: + + # FIXME: mypy does not agree with _images ... + """ + # Iterate over images in the sheet + for idx, image in enumerate(sheet._images): # Access embedded images + + image_bytes = BytesIO(image.ref.blob) + pil_image = Image.open(image_bytes) + + doc.add_picture( + parent=self.parents[0], + image=ImageRef.from_pil(image=pil_image, dpi=72), + caption=None, + ) + """ + + # FIXME: mypy does not agree with _charts ... + """ + for idx, chart in enumerate(sheet._charts): # Access embedded charts + chart_path = f"chart_{idx + 1}.png" + _log.info( + f"Chart found, but dynamic rendering is required for: {chart_path}" + ) + + _log.info(f"Chart {idx + 1}:") + + # Chart type + _log.info(f"Type: {type(chart).__name__}") + + # Title + if chart.title: + _log.info(f"Title: {chart.title}") + else: + _log.info("No title") + + # Data series + for series in chart.series: + _log.info(" => series ...") + _log.info(f"Data Series: {series.title}") + _log.info(f"Values: {series.values}") + _log.info(f"Categories: {series.categories}") + + # Position + # _log.info(f"Anchor Cell: {chart.anchor}") + """ + + return doc diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index d06b6097..311d6d01 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -32,6 +32,7 @@ class InputFormat(str, Enum): PDF = "pdf" ASCIIDOC = "asciidoc" MD = "md" + XLSX = "xlsx" class OutputFormat(str, Enum): @@ -49,6 +50,7 @@ class OutputFormat(str, Enum): InputFormat.HTML: ["html", "htm", "xhtml"], InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"], InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"], + InputFormat.XLSX: ["xlsx"], } FormatToMimeType: Dict[InputFormat, List[str]] = { @@ -72,7 +74,11 @@ class OutputFormat(str, Enum): InputFormat.PDF: ["application/pdf"], InputFormat.ASCIIDOC: ["text/asciidoc"], InputFormat.MD: ["text/markdown", "text/x-markdown"], + InputFormat.XLSX: [ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ], } + MimeTypeToFormat = { mime: fmt for fmt, mimes in FormatToMimeType.items() for mime in mimes } diff --git a/docling/document_converter.py b/docling/document_converter.py index f2d29e62..4e436d07 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -12,6 +12,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.html_backend import HTMLDocumentBackend from docling.backend.md_backend import MarkdownDocumentBackend +from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat @@ -44,6 +45,11 @@ def set_optional_field_default(self) -> "FormatOption": return self +class ExcelFormatOption(FormatOption): + pipeline_cls: Type = SimplePipeline + backend: Type[AbstractDocumentBackend] = MsExcelDocumentBackend + + class WordFormatOption(FormatOption): pipeline_cls: Type = SimplePipeline backend: Type[AbstractDocumentBackend] = MsWordDocumentBackend @@ -80,6 +86,9 @@ class ImageFormatOption(FormatOption): _format_to_default_options = { + InputFormat.XLSX: FormatOption( + pipeline_cls=SimplePipeline, backend=MsExcelDocumentBackend + ), InputFormat.DOCX: FormatOption( pipeline_cls=SimplePipeline, backend=MsWordDocumentBackend ), diff --git a/poetry.lock b/poetry.lock index c7e040d8..439f9136 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -13,87 +13,87 @@ files = [ [[package]] name = "aiohttp" -version = "3.11.2" +version = "3.11.4" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" files = [ - {file = "aiohttp-3.11.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:783741f534c14957fbe657d62a34b947ec06db23d45a2fd4a8aeb73d9c84d7e6"}, - {file = "aiohttp-3.11.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:435f7a08d8aa42371a94e7c141205a9cb092ba551084b5e0c57492e6673601a3"}, - {file = "aiohttp-3.11.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c681f34e2814bc6e1eef49752b338061b94a42c92734d0be9513447d3f83718c"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73a664478ae1ea011b5a710fb100b115ca8b2146864fa0ce4143ff944df714b8"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1d06c8fd8b453c3e553c956bd3b8395100401060430572174bb7876dd95ad49"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b1f4844909321ef2c1cee50ddeccbd6018cd8c8d1ddddda3f553e94a5859497"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdc6f8dce09281ae534eaf08a54f0d38612398375f28dad733a8885f3bf9b978"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2d942421cf3a1d1eceae8fa192f1fbfb74eb9d3e207d35ad2696bd2ce2c987c"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:08ebe7a1d6c1e5ca766d68407280d69658f5f98821c2ba6c41c63cabfed159af"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2793d3297f3e49015140e6d3ea26142c967e07998e2fb00b6ee8d041138fbc4e"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4a23475d8d5c56e447b7752a1e2ac267c1f723f765e406c81feddcd16cdc97bc"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:556564d89e2f4a6e8fe000894c03e4e84cf0b6cfa5674e425db122633ee244d1"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:57993f406ce3f114b2a6756d7809be3ffd0cc40f33e8f8b9a4aa1b027fd4e3eb"}, - {file = "aiohttp-3.11.2-cp310-cp310-win32.whl", hash = "sha256:177b000efaf8d2f7012c649e8aee5b0bf488677b1162be5e7511aa4f9d567607"}, - {file = "aiohttp-3.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:ff5d22eece44528023254b595c670dfcf9733ac6af74c4b6cb4f6a784dc3870c"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:50e0aee4adc9abcd2109c618a8d1b2c93b85ac277b24a003ab147d91e068b06d"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9aa4e68f1e4f303971ec42976fb170204fb5092de199034b57199a1747e78a2d"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d84930b4145991214602372edd7305fc76b700220db79ac0dd57d3afd0f0a1ca"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4ec8afd362356b8798c8caa806e91deb3f0602d8ffae8e91d2d3ced2a90c35e"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb0544a0e8294a5a5e20d3cacdaaa9a911d7c0a9150f5264aef36e7d8fdfa07e"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7b0a1618060e3f5aa73d3526ca2108a16a1b6bf86612cd0bb2ddcbef9879d06"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d878a0186023ac391861958035174d0486f3259cabf8fd94e591985468da3ea"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e33a7eddcd07545ccf5c3ab230f60314a17dc33e285475e8405e26e21f02660"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4d7fad8c456d180a6d2f44c41cfab4b80e2e81451815825097db48b8293f59d5"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d954ba0eae7f33884d27dc00629ca4389d249eb8d26ca07c30911257cae8c96"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:afa55e863224e664a782effa62245df73fdfc55aee539bed6efacf35f6d4e4b7"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:10a5f91c319d9d4afba812f72984816b5fcd20742232ff7ecc1610ffbf3fc64d"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6e8e19a80ba194db5c06915a9df23c0c06e0e9ca9a4db9386a6056cca555a027"}, - {file = "aiohttp-3.11.2-cp311-cp311-win32.whl", hash = "sha256:9c8d1db4f65bbc9d75b7b271d68fb996f1c8c81a525263862477d93611856c2d"}, - {file = "aiohttp-3.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:2adb967454e10e69478ba4a8d8afbba48a7c7a8619216b7c807f8481cc66ddfb"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f833a80d9de9307d736b6af58c235b17ef7f90ebea7b9c49cd274dec7a66a2f1"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:382f853516664d2ebfc75dc01da4a10fdef5edcb335fe7b45cf471ce758ecb18"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d3a2bcf6c81639a165da93469e1e0aff67c956721f3fa9c0560f07dd1e505116"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3b4d5fb5d69749104b880a157f38baeea7765c93d9cd3837cedd5b84729e10"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a90a0dc4b054b5af299a900bf950fe8f9e3e54322bc405005f30aa5cacc5c98"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32334f35824811dd20a12cc90825d000e6b50faaeaa71408d42269151a66140d"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cba0b8d25aa2d450762f3dd6df85498f5e7c3ad0ddeb516ef2b03510f0eea32"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bbb2dbc2701ab7e9307ca3a8fa4999c5b28246968e0a0202a5afabf48a42e22"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97fba98fc5d9ccd3d33909e898d00f2494d6a9eec7cbda3d030632e2c8bb4d00"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0ebdf5087e2ce903d8220cc45dcece90c2199ae4395fd83ca616fcc81010db2c"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:122768e3ae9ce74f981b46edefea9c6e5a40aea38aba3ac50168e6370459bf20"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5587da333b7d280a312715b843d43e734652aa382cba824a84a67c81f75b338b"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85de9904bc360fd29a98885d2bfcbd4e02ab33c53353cb70607f2bea2cb92468"}, - {file = "aiohttp-3.11.2-cp312-cp312-win32.whl", hash = "sha256:b470de64d17156c37e91effc109d3b032b39867000e2c126732fe01d034441f9"}, - {file = "aiohttp-3.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:3f617a48b70f4843d54f52440ea1e58da6bdab07b391a3a6aed8d3b311a4cc04"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5d90b5a3b0f32a5fecf5dd83d828713986c019585f5cddf40d288ff77f366615"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d23854e5867650d40cba54d49956aad8081452aa80b2cf0d8c310633f4f48510"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:486273d3b5af75a80c31c311988931bdd2a4b96a74d5c7f422bad948f99988ef"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9075313f8e41b481e4cb10af405054564b0247dc335db5398ed05f8ec38787e2"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44b69c69c194ffacbc50165911cf023a4b1b06422d1e1199d3aea82eac17004e"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b339d91ac9060bd6ecdc595a82dc151045e5d74f566e0864ef3f2ba0887fec42"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64e8f5178958a9954043bc8cd10a5ae97352c3f2fc99aa01f2aebb0026010910"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3129151378f858cdc4a0a4df355c9a0d060ab49e2eea7e62e9f085bac100551b"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:14eb6c628432720e41b4fab1ada879d56cfe7034159849e083eb536b4c2afa99"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e57a10aacedcf24666f4c90d03e599f71d172d1c5e00dcf48205c445806745b0"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:66e58a2e8c7609a3545c4b38fb8b01a6b8346c4862e529534f7674c5265a97b8"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9b6d15adc9768ff167614ca853f7eeb6ee5f1d55d5660e3af85ce6744fed2b82"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2914061f5ca573f990ec14191e6998752fa8fe50d518e3405410353c3f44aa5d"}, - {file = "aiohttp-3.11.2-cp313-cp313-win32.whl", hash = "sha256:1c2496182e577042e0e07a328d91c949da9e77a2047c7291071e734cd7a6e780"}, - {file = "aiohttp-3.11.2-cp313-cp313-win_amd64.whl", hash = "sha256:cccb2937bece1310c5c0163d0406aba170a2e5fb1f0444d7b0e7fdc9bd6bb713"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:994cb893936dd2e1803655ae8667a45066bfd53360b148e22b4e3325cc5ea7a3"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3666c750b73ce463a413692e3a57c60f7089e2d9116a2aa5a0f0eaf2ae325148"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6ad9a7d2a3a0f235184426425f80bd3b26c66b24fd5fddecde66be30c01ebe6e"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c979fc92aba66730b66099cd5becb42d869a26c0011119bc1c2478408a8bf7a"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:766d0ebf8703d28f854f945982aa09224d5a27a29594c70d921c43c3930fe7ac"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79efd1ee3827b2f16797e14b1e45021206c3271249b4d0025014466d416d7413"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d6e069b882c1fdcbe5577dc4be372eda705180197140577a4cddb648c29d22e"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e9a766c346b2ed7e88937919d84ed64b4ef489dad1d8939f806ee52901dc142"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2b02a68b9445c70d7f5c8b578c5f5e5866b1d67ca23eb9e8bc8658ae9e3e2c74"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:374baefcb1b6275f350da605951f5f02487a9bc84a574a7d5b696439fabd49a3"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d2f991c18132f3e505c108147925372ffe4549173b7c258cf227df1c5977a635"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:34f37c59b12bc3afc52bab6fcd9cd3be82ff01c4598a84cbea934ccb3a9c54a0"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:33af11eca7bb0f5c6ffaf5e7d9d2336c2448f9c6279b93abdd6f3c35f9ee321f"}, - {file = "aiohttp-3.11.2-cp39-cp39-win32.whl", hash = "sha256:83a70e22e0f6222effe7f29fdeba6c6023f9595e59a0479edacfbd7de4b77bb7"}, - {file = "aiohttp-3.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:c28c1677ea33ccb8b14330560094cc44d3ff4fad617a544fd18beb90403fe0f1"}, - {file = "aiohttp-3.11.2.tar.gz", hash = "sha256:68d1f46f9387db3785508f5225d3acbc5825ca13d9c29f2b5cce203d5863eb79"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a60f8206818e3582c999c999c799ab068e14f1870ade47d1fe8536dbfd88010b"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5786e5926f888ce3a996d38d9c9b8f9306f399edb1f1ca3ce7760dab9b1043c"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:262e45dbd7f1829bcb024259f65b2cf69d1ef5b37626af6955a1c487613aeb3a"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:696adff3594bd449e0fe287441062bdc6f5300928426275b39ed27884ba083a7"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd1411ecfc070af4df129e81fe42c799d95d81c29c22d2c3e4341d974c38f1a"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06defa9017ab50d215446ebbee294e07eb2fcee72d9a909a08192cfacbd43a08"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bc936d10b8fa3f2aa66e59e034085208b588442263400ddb042703d0db99421"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:769457243dc4bc902d376cd14c5c7ec234a4faadb4f283dc2738f004cce9a9e1"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a360c18b2cb391fec9585ba1efc55150e2fbc6100308113117dfea521e810d8"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3e9fd9c11299d6d230eb2669fd1ed0238d33970e36b495b0432ace7f157fc931"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0ccbe8ece8a7796ef41b86a3240034c5918d9b324c2ae48fa0be33565e297c64"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9a8b6b3c788a8a6f88f5ce23d729cfde7a2ccebbeb09db0822ef266de0445a27"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cbe3e356523d0b336543996f92a0e65f760be82447db21c95c60392c8075ff5c"}, + {file = "aiohttp-3.11.4-cp310-cp310-win32.whl", hash = "sha256:a54424050d1eb36edfef913b1bc8552d52a37864c0ea7df3e1e764663e11053a"}, + {file = "aiohttp-3.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:a51f983d91edae7777b5a2af8e5d83224ba01284502c6874a17647ad6cbf0211"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:89261fee04715083ef3b5a0d222b094021793c1728b8ff21da361c79f6384095"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ef6eb1367046fb22085f10c5c84ea2efd0d836ad8088306d652ab1d743faf9e"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d68bb99bc6a4b0a3eceb95a246f5a0262e600e094b5178c2b1ab0f4bcbae6729"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a550b4ff70d06c15057d75ddad89a3e7c496e0609d28c567c20b61cd1265c0a6"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9b41e0fb3b415beccd6d0c6e5f3ee34b7952cd76120a1db3e45507b83dc5ef81"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8feffa8396724116be5bc05bf4fcba0c738cbe908c82a95f71371e32b28cd2ca"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1dd5b7947e23a08c70d4c1924809b91211f14136ffd13d303dc487913cfebfeb"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab5c6a521b156edef13a57a6d524903c547573ff8101e3d1bbe9ee1b97267973"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:010bc9606f798eda8ef071759c7b163893071502bcaedc7d5dc49f9d8f12e553"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e7d182164aebad4e2faf2742ee7486d4af73d933461adbd8f183ac9b1837323c"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:88e681c0d17bb285d2ccbb73ae77ef86339b632ee7967044c2284411120b9730"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0d2cea21ec05b832e9f6a2390b23d32ce5575f6cf4812bd171d4493f59c101fe"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:635397b5b4de2397f8136f8fd15c8ebee560e36473195c7aa992ffb8e46acdd3"}, + {file = "aiohttp-3.11.4-cp311-cp311-win32.whl", hash = "sha256:cb2d5a24586b508f658ddd710f7d4b7e4f5656cb5d569aeb1f432c1c3704347a"}, + {file = "aiohttp-3.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:ee081375d10fa2f3f7b0d050c8b9c1ae23190e1d9be256035bf8a41059c4df3a"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:5cd60673be31449c63f59886f3581478bbdfaddd87e7394a4d73ad134d9be9b9"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4ff6105856ae688b29d5daaede1256f5e02e9d5cb3059f8f5ef55d975c2e6992"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b169507c98b924fd68b82ae366c285daf6d22456835294c329c3226d61e1f69d"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec84106c8b7ff347be06bf579c298a23b6d1d2225c57273a8cd502f257125d4"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03d53b0888f984f4f0c216a37577ee7e7b1ed1dac89cdd2fde61bf2ccb32009b"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:822dedad93947fcb1096cc41ee8fd32e9f652777561a37c740e5335699f01cea"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aef239c307f3a3f830933d612c0aef4ad4b3aa9ce5233a0954262a00f5c379f1"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49eb5a0338f141ef32299d48f1415486f47953d37b0c7fa6d778b73b66f3a7e2"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7be4efe60e9bddf78ee165a296e80170147282081e1366f0580cf4cc0fb1182f"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66e83a9a1131f0060aaedcc57f1a7e489898b6c3607eededccc7a9f80b95bdb4"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a7986fb988314fd2225c1ecab45fd457e1f2c097dcc3c0aacd2a7aec7486beb6"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a34c30e1461da3a69c5bdcfce44418b6f969e1e68ebf367edfa5eaab380abf7a"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb4c676ab99ca2dd231928d481e19cd540155dff36e70e613179c4927bd520b8"}, + {file = "aiohttp-3.11.4-cp312-cp312-win32.whl", hash = "sha256:d40d9a740053cb7fef72442fa7bd699060ff4c710971ebdb8dd7c8b36417570f"}, + {file = "aiohttp-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:365df6cf2ad144479ba0e0b58abdc5276923676d34da4c1c45613a80d2aac130"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f307632f3eaa676f8c2f5df11e4c00ad47dfa79b06cb2fa39156a4e9c6821bdb"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc2d64b1747efa183ced57b6bce53c9ea8e16e53419e389051b2a214ad0ed051"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f37ece590451ecffc815f2eb41f07191d1a31a0404361d1ae2ed532e05c86da4"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b10b316413c80a4dcc5228c092a8d019e4b75d4efbca8988cb5b67ae9fa56881"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:beaed1b2d03033dd301a7b67430f03c8255d6856a269c20995a0292de596519e"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:518578d6821c942362daa14a56f26b739abeede6e408b0b83e27dfcde17730f7"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e09bc44a1abbd96f55d15330d6cab80459cb8b06a0b656efd712ce47a3710d"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae8480148d696dae49126e97568333fc01493069ad46a94b82f69c7a33197ea"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b71aab89800fa2eaeb28923ee05e7e56c28dab4ebdba524db06e963431bf6192"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:821c9c640d3dc456c6a7b005e38bc5af05326b6a08ce91a068719934d108a1bb"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d60255f3ed71aa14a2e75383543ca31bd362fdc7f0d2eafc060d85a9051598df"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9788781f57fb732426ae74b9955b899e677ce42b848e60a11be29358fb20c976"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94acecf2eee13a45f627ed25a28f5a7f2db66b90ff94cd7a1e9cc1ad32cddd43"}, + {file = "aiohttp-3.11.4-cp313-cp313-win32.whl", hash = "sha256:d0fd6510c6d67d08ec80d9ba10cd340a8cfb0dd33436c858ed38d4564abb27c7"}, + {file = "aiohttp-3.11.4-cp313-cp313-win_amd64.whl", hash = "sha256:474f7266a61d1c3218ef4ec0325747884b2d5a13fab5bff5dd3b55d9c849406a"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cfe8646a24856624c1eb7649da99333f0d7e75d9cf7c155ea870957d24b7c63c"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e69d9869df50dd591228c62fbb3923d6124517d6bfc47a804492813888b497be"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb4f1fe110332651c00d2df160978cf1be70896ed9e612ff7c7e67955091b2c4"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d97668595bf03299148ea968fed2195cc76ad063aeec8161731aa6a5dbc2f675"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c0b3378dc294ad6ec6c038ed57164165e0b83ef5f61eee72f6eefccd7df34b8"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0898a77298dc24eef368511d98e551e0b2db293fa9b40c982f4d5ab4d8d2a3a"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ecdf43ddde709c336a655c8b3858c56af8f7402de2572001a5a99f7bebf2f78"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bf9c139dfa004b65d2d71906abc593dcafe78a508f33d56c1ca9d87b18337f"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2d978a95e4b58ef1fd937fbe347ab397c79ba24e17912595b54faafb88b9b937"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e32517c01905e0f4e665c3f3a495868ad996a32c243fcd917587d740253d589"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4275160583df18158e0d6789797ad314a14ae611b98933fbe7d7a1c3dcc6bad4"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1ff7afc3c461bd9217e2b8a90ddbe5edd94687d5a331c4ae6166dca5876d1a4b"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:83bd5aa621b732a0ca1aa3490abd2b010247c4677371a804431935aeedf26e74"}, + {file = "aiohttp-3.11.4-cp39-cp39-win32.whl", hash = "sha256:542a4610571b0affc6e13dda9357235f5f1f2ad9859acc69b188eb53901292d6"}, + {file = "aiohttp-3.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:a468b1b9d5499cbfd0411f5d28adbe651c90508540fdaefb4b7a2171a837a88d"}, + {file = "aiohttp-3.11.4.tar.gz", hash = "sha256:9d95cce8bb010597b3f2217155befe4708e0538d3548aa08d640ebf54e3f57cb"}, ] [package.dependencies] @@ -1031,6 +1031,17 @@ django = ["dj-database-url", "dj-email-url", "django-cache-url"] lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"] tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -3561,6 +3572,20 @@ numpy = [ {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "orjson" version = "3.10.11" @@ -3663,43 +3688,31 @@ python-versions = ">=3.9" files = [ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, @@ -6457,13 +6470,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "transformers" -version = "4.46.2" +version = "4.46.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.8.0" files = [ - {file = "transformers-4.46.2-py3-none-any.whl", hash = "sha256:c921f4406b78e6518c97b618c5acd1cf8a4f2315b6b727f4bf9e01496eef849c"}, - {file = "transformers-4.46.2.tar.gz", hash = "sha256:3d85410881e1c074be767877bf33c83231ec11529f274a6044ecb20c157ba14e"}, + {file = "transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef"}, + {file = "transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc"}, ] [package.dependencies] @@ -6609,6 +6622,17 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-openpyxl" +version = "3.1.5.20241114" +description = "Typing stubs for openpyxl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-openpyxl-3.1.5.20241114.tar.gz", hash = "sha256:caeb9aafed8a5ffabdc74f880b148d90375364a1cfe7915d5065c5d79f3fe6a2"}, + {file = "types_openpyxl-3.1.5.20241114-py3-none-any.whl", hash = "sha256:f2925f595b08f5aef1baa725c9ee40baaf51beb05d98ac150593d3bdd37b1029"}, +] + [[package]] name = "types-pytz" version = "2024.2.0.20241003" @@ -7212,4 +7236,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c7a2f4e30564c5bcd7ed96f203028f781a05ff2103698091616c8aff34ab3493" +content-hash = "1efd17010c7b811afb0c3a09b53d489cc6ac443b4edb6d6e5399b5d6b50d574d" diff --git a/pyproject.toml b/pyproject.toml index 088320e9..6a01764f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ python-pptx = "^1.0.2" beautifulsoup4 = "^4.12.3" pandas = "^2.1.4" marko = "^2.1.2" +openpyxl = "^3.1.5" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^24.4.2"} @@ -65,6 +66,7 @@ pandas-stubs = "^2.1.4.231227" ipykernel = "^6.29.5" ipywidgets = "^8.1.5" nbqa = "^1.9.0" +types-openpyxl = "^3.1.5.20241114" [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.5.40" diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt new file mode 100644 index 00000000..cab5f63b --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt @@ -0,0 +1,10 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: section: group sheet: Sheet1 + item-2 at level 2: table with [7x3] + item-3 at level 1: section: group sheet: Sheet2 + item-4 at level 2: table with [9x4] + item-5 at level 2: table with [5x3] + item-6 at level 2: table with [5x3] + item-7 at level 1: section: group sheet: Sheet3 + item-8 at level 2: table with [7x3] + item-9 at level 2: table with [7x3] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.json b/tests/data/groundtruth/docling_v2/test-01.xlsx.json new file mode 100644 index 00000000..9a9e0d52 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.json @@ -0,0 +1,3240 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.0.0", + "name": "test-01", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "binary_hash": 9744611217659152490, + "filename": "test-01.xlsx" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/groups/0" + }, + { + "$ref": "#/groups/1" + }, + { + "$ref": "#/groups/2" + } + ], + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/0" + } + ], + "name": "sheet: Sheet1", + "label": "section" + }, + { + "self_ref": "#/groups/1", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/1" + }, + { + "$ref": "#/tables/2" + }, + { + "$ref": "#/tables/3" + } + ], + "name": "sheet: Sheet2", + "label": "section" + }, + { + "self_ref": "#/groups/2", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/4" + }, + { + "$ref": "#/tables/5" + } + ], + "name": "sheet: Sheet3", + "label": "section" + } + ], + "texts": [], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-6", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "col-4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "28", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "32", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 9, + "num_cols": 4, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "col-4", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "20", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "28", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "32", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 5, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 5, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/4", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/5", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "pages": {} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.md b/tests/data/groundtruth/docling_v2/test-01.xlsx.md new file mode 100644 index 00000000..4a059c60 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.md @@ -0,0 +1,51 @@ +| first | second | third | +|----------|-----------|---------| +| 1 | 5 | 9 | +| 2 | 4 | 6 | +| 3 | 3 | 3 | +| 4 | 2 | 0 | +| 5 | 1 | -3 | +| 6 | 0 | -6 | + +| col-1 | col-2 | col-3 | col-4 | +|---------|---------|---------|---------| +| 1 | 2 | 3 | 4 | +| 2 | 4 | 6 | 8 | +| 3 | 6 | 9 | 12 | +| 4 | 8 | 12 | 16 | +| 5 | 10 | 15 | 20 | +| 6 | 12 | 18 | 24 | +| 7 | 14 | 21 | 28 | +| 8 | 16 | 24 | 32 | + +| col-1 | col-2 | col-3 | +|---------|---------|---------| +| 1 | 2 | 3 | +| 2 | 4 | 6 | +| 3 | 6 | 9 | +| 4 | 8 | 12 | + +| col-1 | col-2 | col-3 | +|---------|---------|---------| +| 1 | 2 | 3 | +| 2 | 4 | 6 | +| 3 | 6 | 9 | +| 4 | 8 | 12 | + +| first | header | header | +|----------|----------|----------| +| first | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | + +| first (f) | header (f) | header (f) | +|-------------|--------------|--------------| +| first (f) | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | \ No newline at end of file diff --git a/tests/data/xlsx/test-01.xlsx b/tests/data/xlsx/test-01.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ab75b72d56c5315d33acd8aee12113609c1120cd GIT binary patch literal 21387 zcmeEu^Oq&ax0Q+qP}1%QkL(ea_6BJLkLW{sA-jL+0K) zbM3VvGM*>miHtYpq<}$C0Kfns0000806b*0BO3q#0E)l9A_G7GX$sj`I~rL#>L|I{ z8aZgux>{M{=Yjx{=KuhGUH`w^|6mV{Cuzy{(xU`lO25HN*-WVP_ha;3^~Ok%Ao}oI z!CU^aNYBc>y5yk>zzCqn2-_@wJ$B1(Dw^QFGN(o0g$v%#-9Br}EP{t*+@> z(Y+4|rkDIzg|r@X&xU<;t*@l6F%HHHV@)TLAMSf17lDaB zRQuMcmV=oz0P?JyMfJ`$AgO+(&`opB5)~OPbM3x2OO5ftclIJF3LuUHJkj@@d{~0K zVRrC{*&WaoT~DfCA2(b%VIIHcpA-Zbk$AZy!Y!|Y4zm}hHY+lX+C&k}j>1k^dI z$bm6A;RW>o5hybU7|Mjr7CT+8_6+y!nz?J@+`Z(V`0?I-cK1$VJV7`=yZ0JilY~=w zzWM5v0Lps7uytarPd^YA93XE%Uut~*Y75Y>lJjTrFbh9oeQfC=|F zpEW;QEu$8ewbm)b*wG}LG~By_2n}|>;N!#&T^!`Cv=QAb2XC;%h|#Ye#J!9RYl;oc zxU;uom1~;dWO5<_;YlttPgzNu3guU>x4iLhJfarUSmJ&%1?zkT>gOjgfZYFu_6B8o zqPwp%-@m{N{e^ZNdm~E+I@-VX|BLef!O;E})63(fW&7x1f-lA2LWUm~HewM3rCbEW z+X$4reI?f68=~@vzio7pVIwGE1ptfrba=mwu5EBfoeUE^>@t@}Afa#(HM&#;C%@P^ zfm4#&CyCmX@AM-&FWfIYq>4$pkvn(9P?j|p|B@QnA`+du7Op{~}N^FQE=iW!7p80w#)2*Kkvaf9T zYQ#SRuXyt-@%=kU6gKH;3xELtq#ywR;J-ZMYDwo}V{f5vV`K4GIxJV#vdLpd@zOE- z0N%}9C5=T;VuS*$|G6sv(|C2GXcVx>6*`5bP(pdaC))zalnb+>Hd7#sk8bElJKKZf zvz-chFKi?@f|7m&lNDB_fN*a?NY`77B*_FTDpy$)TE3>+(>Utc?csOKZAEO11pzKB zKyh04_|Ho6sKrhmhr-fovbBf!Nlz3~ak|OHB{b}ewcwBM{*q}ZV4X|gJ7RAjtIo_*m;EjQ*nIY zmtFg^c!Lr>CC@;Jm(7L4_q^h#aoN^QtUL#!pE=WM1uWj9Yn-H0e$UH^3TLWRLrXzU z38(RB!e8UGbOUSy!L`g?!9TJvGqe<16RhAL=JC{BUB=y+L<(e}LrFkYwj`hg<>Y?4 zFH1|$1b!ey&#^Ta`LJp+j8)5_yF%4SdvVpj))DG22{@6;!s;$x@P=mCY(=Br zm8Mk=z)4JxyB)VzV;U=uE4K)iZQLFt_dxM*r5^d1-#6LLt1I1_GOW^7Cyq;M+V+%5 z8~r$(t!z(PvN;uFqiW7sFI@}mr{hC1_0DAn@$7ZKlSXUjm+Z<~iY)0CYE`pLb$g}; zN?_Ao&4-gI4)wAI8kkRMjfG`~owgDo>9_5LY$BYQ(;%F?=bhZ_i7x2?iD*4)4ND8X zyui4{A)fEVu^?cU>v5~C@ipLf)C6Da@|^6?DoUEg30D5DB-%l=lx+ZAD;X1Y;u+2e zIPLGbgow@1M0LH39dxnMU-8P1F~MQj_PQa}GV3}~1Z+E|M$ zMLP+kMci0lva(mhL1TLj3IN+QYUzq}T!**$Ox}TY)u~2~N?1dNMc1iRQ6*c|9xQup z+gI9qR>qi+ZOW!gzU@-I)_F%fE&|-pX}0D{&DAEYyZplIzax4R@|_&$3+wY=sKx?- z0Q^GqKQiCH;`%?bAmCT6`*rRA?5#av%KR$}3|HFklFAh2(|K$(EdUXAb#`Wn=X3aDZ1=O2Q$x9jwPXhs4pnfSR;R8@8IPQz+Y^f z&8#cd<@Xn|^J((wlW}+xF1#ns4JQE&Ub)QApVh6Ru*0>=n5Vr}}8jIfWvF`#`NQz)8 zReA(~ZI6Nv@Uh43c2@qxYQbN{9%ZGC9;}mrYW>dU>7YvYT`ouMRLse?^^4-PdCqDlr;~PY%nfqZr?w}ov2|4V=DeIcSo~-4b@8%g;K+;QO$YB zteLipd2UmAxX7irhA3*@IDh5GSRks2%e*T>ED#QRGb)QZNQXP+6{SGmH@yzKmZsGk zyl)eigtEvmdjm0Yo_y|n<49+st(^6Ghi8x-Vac79thqt zy6q^JdNDy$9m`Lf^mjs1?rE1)?P1h1?&0)$K{6U)hTU7xX(cxgBjZ*Qp=Ku05_MxK zu#}H{zm@9kd1oUP;${$1H03Iy%ATHsocsmx}CS88}dvtk~fTD!RPCKeSZGgB_>G01>hs_ zSLrWnzU35xNkD1$tHoDZDn?q_=on(fg++^P$4CsSfhVONn8;xkrr{-yS}5RA24QxP zUrY*#vQXvwq6Lxf6j<-9z{~@QQiVJ{-$YW)inuta{+^>-%s=B#l|zw5d9QQwCtUv+ zS4$!OUG`p@T5@WfN@Z9=nyN;bahge0fqeA)ly8mH4Rm*Ip(K>VGYQFa3%$0exxS=1 zVyH=Zer!^<4$$85+RAbA5xNn&Nq%9{@?Qk_?>w4GPDQo-#i0u@|4$S!{y{;)TJ#z| zLhvQkJr2Sn9PdE7nw^S7d2vfgX=#Xs{}2Nd2Kums@>Yj|*hpn!x+w_)aH!2;NcPI9 zI+Z=hX}%dWR+OM*4r*cid1VftPk8_&$>eS0?r6Q-G&K7Q=jWT5Go$k*7JzK*bgMt> zrVEFG40BG)w^TTd^^&NC^}YFvlvC;}bM}1Hh(3CQw6P>@$QG|)y%P;6@+fvw(|1&T z0lS#{Hc{sb(YamBj6-LiV<0`pkkAUHSsgHP5{>XZeprX-ssN*}RV;`Y#w@8-4oP6h9e|i0pAuc?Xc-1sb`i`Dn57 z4VUKl)Lk3a{2)uvjSDi9EBwuvQaPq9*GWt_=E^CfVs;lZY4Wol{X!WL+DQOizB7z8 zal$0P&s&1M#f9PbdozpimW?I7Y$yacl<*}?uN`|YR3hc^QuE_ow%_VdsG-Z@>(ihP zJno0_ND`MC=Dns$XU~+sPl10LUgD>(0;7mfV~ zB$76)1L$FbbP9F>MS<^BQOt-TTw(fSofl*ZP8yqZLMwtZ;!9dA_%!c1WX_oJ_(FCH zTQybpm&P7X+{13(9kJczOx=7^X9u-g8CKn9nw=PA} zh8_uHJfeTzsvQpQb2r)er||#-@OXXY_AE1MY^B;zy8U$O~71ib-POldve{ zm`b2<3D3Wl!XthtSA%MNX`c5g3Y1bS#Ap;|NtMA}h8ki}{_G`@a=C2OlEF}G>j zGeO@rF?r*!lgf2vj%UEmC(PhKvKZVBA62rp}1HGf0pk8J~UFqLjDR-;x+FPxBqoJnTI8yeCc8MXUYPx zy8#geG#+WWg$rtE;Nmc@Uf`xTe6})Zr}v5T_xz*K8q`p6{PPhUsIot;5RFw1c&OrU ziLg&!^^1Hn?9438j8eYo_W|(z^Drl0u+X!_bhKUi$F}C;UYV$)+T+!95?7 z*q$l8xKX4Yw%RM4dF|+mKn?XlrEHk*&#lpeiBI?^f~J~6qO#k+_s(vMe)qIiQ~>}9 z2%a-(1Wo5hx~cmicA9)EzLqx2LRq_Y9#h2R8{^yGRq)n8PPj)d*4qFX$h>b5qnLvybYG@x&+Q;A}@~ ze??EN^vN?!bX;6xyo(*5WZGKIu7*Uuk!-#9K@Wyc3Ft6~zk>bgX-Ul6UK`J*_8Cw; zgSzNVn|z@ZH`BHEY>y(#9upr@WH+w^yEvoI?Gw(f1C#I(%l92D!pD)71|`e(G@PBB ziSy_;`eup9S!R8LP=nFPW!v}r7$T%h9YAybp{8m;G`N~HJ}jcJ@9SVIVjY37WL zXDH_9TuD8PWOdo#>q~Ca1>^fh+z(eZjdxAdUxM;)E>PqHG9h>mSE%!aP|G;K9$C4~ z>@@t6Q#7y(lXJ%ZHEAlZ{lyUPsQutDy$wnfvpK|Dei*Y^ttC0uudgeR$^9e>x53e} zo|Z#oSHB&wk1Y5eS-O1QK^~=sdv#wfY>+)O$LODo)cG>_J}UQ^Bb%O)25ltNZUIX! zhn2|yvH`s28Del*VuFKaxS1r=XDTBhq6Hu|al*uKh6gzDLnzAub*aJ-1(=d78Kk)G zr)8A4g!5;Pv$TIMm^1UBO>hgxMZoS2KRUKPe*cB}TO;qe#_@t6>W9T*%PduBmgwVH zGJ zwJdi0atZ80wAFoB<9i#YzL8LdK7peb!Qg2G@o~uenj3K)(~7u?qC{VH zb(@1vsD}464OzX$BR5t_UN$Wulri+Re1wqlnP|DumHhJs- z==OGcoFZxNJpY5-aw(v~8(7yIoz498vw|16yAG4vY_Oo5$&~rsImJLK=WO;@+Z>cR zYiE8}V;{#c^Q+#*tPOwa$IAs%G*_NpU+*`$V4JgT5AWs6dFpmRqo+f-PTrG--nbyP zoY0wlPxKAThP4Uojv#7wF2`Q3iNYmCroq$08{+Fa8mIK z#oBJB&|U4)W?>;p#o#4eNxNPWm{&an{^Nyr+gBUvXm|)(2@CCR;R_fuUhzo~Y2_sk z0F4f${R6hGcGr*lh@(%p>j8AStWRj18KZng<)n22nIxkYypNBe&-Z(mmqwt&2rH*NV|2=g;KY^qnvBM@sZIsWY7)#?todM7?NY6zem07saaq}ht!soOM7L| zK^L34K{ajmhNG50OiK+6@dI1q1PGIbKcOyZN-mLVD=6aFdr;`6YSi*5MN>7p)eabW zWVt(8;mB!tHZnCx*PEMpHoVGxGisMqM;%=iwsx8Q!;W`Q!vsQAydu!ri1ltOjg__3 zx69AY`jw_68|bt(|HH-{yAFS^SpULey_|Lm4W7blDwbOY zn6#`!v+vfgzGfiFFK)U(&`l`I!}vW5HpStSgc*W9?$eB$6$Gjd7T)?@IR4 zO=i3my9T^dtC@ve`zEA`YN&hOz*w>??DeHe!Pbp80uweu8`WBADyPA|Nga@3vxcl; zX#v6(Oix%ytqu>~YL3{{s~Ycv%3Os2=9*RA9&G+i_mxYz2isOT zE*IurBS%IN0V)QxWL=n-I%0~lX`vg~uIfd4*-4a5bOB0P31n5q}Bd(jU?#Ds^Ir4s5 zS$)}l-sxSnm_+q3BvWIiVJ2deeqs5t-QeqX%6)n8c6>}LZvIcBov`2rI^ zgdl^cO3ig{47r{<%gBQ|*rdp-bCmquIE?hRg;ugo=_7?2AydbxV_KQxFl5M$r5lMq zJq=xDS!G)Ne8u0rZcL;Ov<-D~blKv*l&1aXGIpJHkhH~&h^Cg6%)C>Dwecd{Sk`L3 zo&iPkY6D^Y2x#yu2iN$7J>7mP)lbz`+aMKx=Z>wHC~YqURxs4A4GiUzhE^RjHuna} zs+nTYJkf3V*PSDNSrNPG;Z9GCJ~IT(qbr5N`-3h;?Ow5?Fb@T5_n!jG*ugx@_33+?N4Tx@+c|vL(L+(yv)BNY z7mrv!wYyl0nb-2X+2KU*f5C5izP9)`VPrZuXm%IuI9-bUZJAGOxXQNr)dJfD0|5A{ z1paD)IXJpm8aez`+vk7Ph->sPUU=0$2zM8w&U@65mgv%oz@Ux#{9L+K;WT7|P7K5a zPMl|3VkX!`w1B~+EHm>h7HEOX_G82iCaro<&gNFcPP%EZHBox?wI8u4 z#BhBSafwvxMAU}QbolXUAzH|Z8m#lzEa>a!T-1?v9u3#@6=NDVXd{lzRH zQk<7%9>jD}jjhfmzEX}_xA7f?Tx1S{W5`JOU9c3A;uu}S9V|A&X$XC|Ey?glW)(l{ zis!iw_`l5dXgp5B z%FBL}ktRWZ^{(Ptm*54Ff&+RY-yMO$gAQp%5Hqha0gUDbg~eei1ptOUef0vyMyc0? zpBs25aW^>T4NsT>oYhJxvaq5wy;PKktK>#4cjlkJnppJ7wBB~9$P7Rmo(rFgQc-$R za*y2unx#lZaQT4rtkThKVo~5y7k5!K!oW?S>(zq{gBioEV&ne6s2IZSdC`C8qc%S! z`rwiwT4mPvp?DINSaea&nhgkRT)i?8pJ7vge1+M>{Ej>{7B^(FyeO0wyIkx}XYNuC zbkmW%qshk!;&>eksd3qC`{TXP+E=}Sj6`q?Vlu{0CU1`hUwpv475U<$Idy051-k4> zW^kyu>P@~4jD^q}bZq%|}^y*UFk!66Cc*02RC zeq_PEf$~rfTl*awHqRw8*V3u+qIA$w@pD=89!LhboCfu-}$8B#IN&@A+Zz@3+En14SnbXus&h!j4uKP_0 zX+H;qu*8eb^dU&^>wD~t3ihHPY{^u*&7)uI@+6AoBY9oXRLrsA3cJ|qxgDH z8Y;C086bhu%0(^`nJ=)~Lw+MlOu$gd=sAO<2ue-HP0F#E)v*@%&#~F8HxO z9^xjkcsIA4tMHpOZk$I0u{vXSOfN1`98j&2)r5+jc*RF(b!*$Fm=qKTf5cZ!>#Z(ILj1>diSrT-NFhW2_cfBAy$pCA6A0aUDPzQq3rui0nMP5!|cM9NaPU@386 ziNb>N>H{#h0TOX{oL#!G^5?51u-T$QMa3fBTxaa9)m6eLJgw^Oi`${Yr+De#o><(Bj#W{k6RuOCQpx;U;uGot>tRAV95 z`re5YD>Irrjy+Vgo8cwn!D|^o)dDlkBpX&u8M{SGa#V?LemJ<^pc+9DSd)$>`a?E~KCD>Nm>R3-)J1fQ}?uGk9Ay4Wr!xncw+atA~ z7z@+dRI-!TVyq&}Gg1YQ!5#wPLolu)wCp`l!)apq*?>R`JYz%}4h8eP%wx&eG!tFj zPQOea?}fa-)p-*`l`KO^aQIe%CY+H&(2fDC&868>o07KmBW^@Qh&rM*ABw=(%*|`N zmd&#$m|<2n*hUSE!bZ(IlTvYsCpYeGm00eJ3CByJa!k1^5PTw5lnO={=(S)VtE5s- zK?{1MNyZZ{&DYNgwK%QQ-w91IQKNtbA4ponje8mVp27*V9D8VAz+pB~RCL+;JZUn0 zE(BY*+qvn1QcBz1zcPQ|NWL40J3}}#Aiw-Az+)6o3o3p}+@UITFM)$N@59B^1Jp$1 zbzMLbj%eRcJgdT{c@qx3M(6}EH5=p12T8PqS|LJ3n33c@F;Te8if|=MEyIAW$Vo<*d@Ch4Z4yg}H3Rz`~P0)uT(45)q*B|rT0eJ~zs&OZ>u4Qqn zV@d~u4!Q*YJg0+*ylv|i@4!SziO4O?Y-Rrp^d*ic2*|)^_Me7~d|rQyB)%8wgNhHT z%b(pPflrF$pUh12Xy#=~_v`9f~IwvUn{PUaZ5_`|W9*VmBS#cn4_iJL(B ztht)DUj?tXD}0~VoVWv2g!{?@`X>N!?brhbE+j?_=1pU+B1{TdH~xpyP1}S-NZJFn znGp8S{Y;B++*L~|!asz3Jpuh9^IJ0MaGA2{(9FJ@aBG_|C%dEiGZOj%A5hXrO3^%c z5H^c3NE!4UV%FV|^RN9k`ARJ$0J#KR~M^ z9q_}O^mwt$gwlxKSiZDWpLU}JZuj<>hKu~6SLbb_Gzy#NF4T#>ZN0YOvVb%RY2bjQ zLuBI&rYh9*&R#PKlTk1afoDjGsSNM(L^bQ`^pF;L$i4CdO3XG)&~}uNS5}0APMm@+ z44~sys~+eX&KoUqD_Uz5<5&=C)%Uy`l$N(;vI2i#nTD@2Eaz+T3?t3YKw&vqytggJ zgP2HVlxn@zouay+8Rd!YSHxp=+6^+yxodS+i-vNk_mTk{xS~)U@^6-uACpz{38y~- zkh)Rmu^T{HJY^(D6_};r_a#J0Ow8=F7z?$h;mrC24?Ej$7cd$pH*bWS?ptwV=T5au zF43SjuvZURW9yapZ5!ZoR`#RUQ5*ribtcpm;K;X7 zW3E#&u0B#eu6&EYVuWvdERvD3q=s?t33~8AST0vUw{NMKF^7R6ZhM;uWIog7fV*BH zH>eAgPbLT~;u|3=qKPLwxi11JO^leCeN)lc590Y|fI5VV$8EjMInYV946O?CpFLeN zg=D;xHReGdp}yC9p@O0Sa&&70vz0qw5agmWFyjn(n~9lLIwa5{kJXMK>o47XMP% zv;2DWMiyoOpmVTs$sySArpI=Sd)2GYQRR8n^1}3CeQ5v+FdLaj)uFav6znxP0f5(( z-tijXyA~t%%-Ke^G$KLInVJDZwbPp;+5F;*b{&}YF1Yg~kb+ehZB^P(2@uc+m7`=r zXCW z2~=|yB1RQAVM5F#GV;Bv#t;kBhGcG7l`2)VQGH~-x%Owp&&W$2VyrH^R25e5JaixsUgXNarWfFi^za{7l`9x`m6nN(2R+X$X?rR5&5Ep%=^uH&P{rA>8(J4 zQ)#{w&_3^F*u3Ma%(cFROSsHT0|%S>$+;MdmokA^zMYi94~;%sdMoLWOf!qw`F&=fC& zP*ZLX%}~?32{ghq#GKyHuYbcpr9SyO>LS5?Bc?8*nqqkTO7zI4lxr>cxC z76(M3EJftmX$ahE7qg zWE6jAU&rhtb7q!W*P1%Ctr7BdFPO)Fjmq#lG!P4J$4ZPm%r{edBzXFkYexY*9S?^VD1 z;mJ5Od5}xrd?f4PW3pbTvl?>lg1mZ?A+{tC*pktl^nG)0&Wy8p16}XrL(;7qrrn z9hO%U2MuluK|WgBaC#y?*Qu}{3-)eY!e|Ih@!8yi*ZCcRzI*fCI3r&WdfJb6Y(4aD zl(#|D7$%SF{*ps{s+3di@rEASAA_Ty_C{L}D3AAGvYu;hCU0Zzy>xyZ zaEILSY~uk6_vjV?zlZDfl>OCPNBSa$850KR#Cb+W6Te6vOcClF?c(A1uv9iI7_m)v zvth(Vqs3&1!hy>^fM)m_1%g5PTUJ65@h?5%}r5l~cda+o`?4gdaSa zud(6xP*mP5la(lv0$qQ1z2M3`NRA&CLnH(97KvQlfW{OFkqt=bH$qLw0o~BBz~1W|!=CaCC;4-MXy_}yi}x+S<`UAtK^F-I-9Iu+u`{yC)n#PeO^ zr#=CwQ>JAv$b?Zz7v6XuQx{d6nRNved&2}nCT!-|BK~@lU&uLF=h;HHmPZ}EX=`d2 z0WxF3cKSBn<)Cj*N?)n9dbfWVWNiz6l(Hj$Rq7 zv5lb2#v@|e(GnO{1e-)=RvE%f5S^4+W5IeiLMXyp1R`BRgAy_zLZICP>K;?t1t~4# ztpGVVS$Yl1v5l8(PV)ffQIRZ=dIda|nlZU~c>Ko5%}lw6r=)=||Z z&B#eS)>T=t`VY&2ATZWg7AIF=>!HjE;>mMvfHD_owKY|w_?-n^nAbidV#z==#;4Ik znZs>XtDP#QfMSdefD+&HIC62&SKtet_cp&usmIOTF*xyW^{RliAeykFkm>@Cg6^oY zB>6Xhih$syvu8mst{FI(;h%2@;BX{*v zh8)Nc$tl2_uY3jA#9>|}nVM_ViBi|!%m}i)vPwSp!t!YHRvDjpuJnw?^m_`6jH#H;Q{K3y!gts7@J;KM*c0M2l2H^@3(O zu*Q9s9qY+kQtY7b;^c~K?B$W-RRSu2-Fxbe7ET&5srVMTNO5+{rY_hxCi4>S$*y}6-wr6BjQy+MIx z9;x+3%wcr1mNubs<7VdtY4!Z?yD``(MaA!5BFy@0`T+GG-I)K7S~iw8_6~peEI&cO zy7#LcQ-$;lV26ZXLD*Y(8V*W^A#JHY@_@fqVC#_~XDUR1)Wh|9+V$*1OgEz{p0-Yk z-5?Gpy7shfu^3!k#{Rer1&99z>fw-EsgV>-OW^SHjV%ZQ_Doey$FZGaP0T$XDoN*nZ z!H_#4TK^o{h%qxN6G62RpooC7l5T;nC5juJEsN2WvhCKk<^2G4685k(-1iWZ=D5Fg*}(bLpZfP^jk|mX+R)dtT2`X} zPXB-0_GfnMOheOVjUCM!PxmuG1%e(Pc%XGT-f|w7*j@`IK_-Bd3I0`7s=fkJq!i`n zGS0Y9wuAVnGqy=uV;ldPG%iwDGSSUmi{}wNowv5?cuPSo1t?xyPst@`;!!U7v;EnJ zGtaHUk{QYlsGk2{+BE9*c3+3DeOT-{T--@HA%hb?kS)*itTM4wtV27Y%2XsxQC<(G$Oh(t!f2My{x(*j-eui2?x)#(hmo8X(`F91G^X(?RM>nr`5-GWI4H z`q^JLQO%Kyj6rJ{3fi+rcU&v;avFq7!sRmdpq8)hD)T^IhYzPWKIpcvSlVmXDp=h$ zZj3<3W>$b~@3iM#t|`nKpB)PLfD3#o0$wF(uR@=<^cn|z6w7}Tod^cv>)&{2+**)I zK^#0J?h>$k0J90Xc(L0hz+jguL*y-1U?{~~e9}yMHox(YJj`AGAt?qG6td0i#7kwA zM=PF38wSz={i7b}9?hF3rTJT1BExhLQa#`rASeM}^JFIR{j$|&ld!@P0e6To|M!s# z62&XrZQPMimJ_I^N*!~{5;~F(isN0J^1^2Tp;z8=lS)wpL4R)wp#C~a8zxZ}C6j`)-V*j_ z!6>>efhV2^xb#=Q%m$L$~K#xoD#Sx5IZWG!Tam!S{}_ob5xwPp$Q zBAP;gQcn?CM`{@XL4NedUB&=)m=rUbv6_uUtIb)=RPT@S3uy8bLuWT&Xwi@_?e)oS z=f%uvI8iyCf?#LFmM??xuK?BNvaod6Rve5t%v%e4pkaSi(e#YM1{{qhdboVA(tS$#O z&!HhYQB_c{>R9p?Ihc8nqMTX1yhV(4*ql6y)c%S0AkqWnBIV(VZMd<6Q1~$|o9B0L z_Q;h>4#C+K6=`>xA@DlIEa9#smw=p7BWxN-t+&Cc$rw>3`4L=B)93*Vje~t@6ma^o2$znfZ8Qpe== zIH6;t)E$sJww$zl37Xd*!?(n1=)p$b{gg9P#@n)Nm|6x(uVb3FoC{vH3eRx>TPQ3X z^8r>20(zj?tpENKy$QzBB8C?u%{mlHZq6Xm$j0l_Yp{2`wxP|HWDJNRkszWm(y+GE zW~~!&KjfPyki>1yi7%gyve`XHIWBD3r;=ZN#DUP)pneXVFQt!dOUYt?z0aO}3@mM6$ND0QOc_X!p_9*ddKcW111Pu*k~% zk?pnJT!uPj6Y>2XOQ(P*uV*T#BAq0-+#d8x3EN#jYI6SE{9_-n1fx&aAX%Plg@SO_h3YcD0VQqBa&H*l!f5Rd6Vg3k>N+;l$CoAcXv5G&?7(AUP_#}k7{i>=Mm#zfTI?sQtd4uc0{ zD)V&jH}1zW+c%rN!|zSaEhPCYXY=*zy`Ac*a?madC?BYi z>}xi21JAaoZ7U2>cr z+eDst4C}AS{zyMIg9nYe=O{U7h-VR`h(Cnz98R0Sn!|o^)tL8Nhv_QDsu;Us6FFr} zlH)tg4YlJf;U^v*@OY-Hl#bZTJx879OaYCTI%9yEn(h-Tlk#cK`&4?iBA&Bb& zx`j9=qOXHWGn907CDkfTN~72OokIUhwsJ`CiBzY#L5a87lenE35q2O4bD6fYu{`rSUQgEMfcoXK*`saQWI|B-7C|eO?!OO0*;~!!X3|p302qz zFFwAs-M=fD|DW>v!)X7j{QmPbAE;HO+#O#End&P&e2KsRwtnp&iy{B4V*BH`KT9D4 zVB1l3vQ?I$yT^d$jmn=kWD^ovHXQ>;H2|;UBjD>!`vX z>)T_ezO07{`LbTlzG=f50kKU>cv<-lP%!zur-DWr_3;5sSPr4x4N7A8@pZVeRW`3# ze!E8LC600ZFS;sBUrCU?=qD1 zEKV4kIK#2>J!PKMp;8S6ydH-?kK($x=Z6x;z7+o7IKlVZy5H>AnV2sp;C&sx z=C}UYEcu5E{(Sk1u< zucB0%5qM>r9d=J&18e^t6Nsbk1atpCcl;L`%42KH`@dd`@F=*|V``IONM4ZPAoR=D z!ov$-hdhj9jWLOX)<(yKVq&HykND&Ab!NvET3d@EV$U?kLPHLPCWdwrtaI{GmM$k> z7kkEm7?=tYJ7g%N58REKg*ZYgmy#shuORWRv|%|2NU|?XzvFuba1!UCI$1q>hQy=Q zYx%7W7z|e-XB60t*_lA*lG23B*|w$eP6=~Tz@eGFbDr}?R_VEaOMA`Ho4ShZ&tSJg z{cdZn-{~3%cUu=1eCsY6D;jTV_ZLxpY=0XbW#9*G;$NX*@-@tZ__apb(8fT{-p2N8 zYQe_d=&upKuNQ;>{_oOg0D#E&iND6X0x!keh4wvmn|)BIBuMyqrAr3bfi8C^Bo_QAI^NDG#L)Bx7_r>P8I5Qg6+e8NJF78%OE?FN z*>w#jVJA&7Xa)>!CCg<}xWX;z`o01LG9>2@azIUr*wdvAF1pJ1O~YtAF^J2oYmVb0 zmd2Ir;UbU5-9T$ep2ynY=?$c-N}KJxRPR@srX9aJA^QPXO4;{DFg}iD_QCJ|;P;2Y zBz4vYQyojTC+o2V2T{Hiq!Esr}}l5Lb< zS{X88i@$_EL8K^2SKOrYkJ`2v|k5jNoa)3v2EBmkNN7&>60Co zKe@9^=(*h4>@79dc24>n@|M@)&NAf^*1mv~-74d2s25xrF1S zg3gK!Zzk*ux9<2O{^;wRiUf&D;EIs*+vj}tUDCYe*LIUh=kiT?v_AfI+rsP1{clc- z=C8EB+_u}=3)p+|y?5xWs^t|ty=8l*XRzblM$3L6=6WF^a z?#XMLy={@cC%^AsP!CMDkOa-hqzhaXiv4IcSOmgoEC;Oxc%vGCdRh}qBZz(n%q5Uo z8(deTYeYZ62BB3L*fxNkT>v}J23<4iVHpUm3=B`Xz?vZzV;r48n z9iV`23i_6FgekfLSWN*}+32RAZ&5{<;tX8Hi^VPA4Xfy;pl@|UnBoXr`U*4!ed8Ot z0q9HU5e8@}fDJ$_s7Kd|zVH~KRUg=%2ijeXz6=@N1oV}<2ooGsz$U<(#K?_dbW_k* zHzG_4QU{v?n}7o?c?7p6(M>>KgMcvM9dN}Y&?it65GxYUO+fGbAxzk7f^0%EViFMD z2=tjSgb{B|QH%h218b5(A5ue@@XQw31h6--nt? zQs^NikZg literal 0 HcmV?d00001 diff --git a/tests/test_backend_msexcel.py b/tests/test_backend_msexcel.py new file mode 100644 index 00000000..d13959ed --- /dev/null +++ b/tests/test_backend_msexcel.py @@ -0,0 +1,77 @@ +import json +import os +from pathlib import Path + +from docling.backend.msword_backend import MsWordDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import ( + ConversionResult, + InputDocument, + SectionHeaderItem, +) +from docling.document_converter import DocumentConverter + +GENERATE = False + + +def get_xlsx_paths(): + + # Define the directory you want to search + directory = Path("./tests/data/xlsx/") + + # List all PDF files in the directory and its subdirectories + pdf_files = sorted(directory.rglob("*.xlsx")) + return pdf_files + + +def get_converter(): + + converter = DocumentConverter(allowed_formats=[InputFormat.XLSX]) + + return converter + + +def verify_export(pred_text: str, gtfile: str): + + if not os.path.exists(gtfile) or GENERATE: + with open(gtfile, "w") as fw: + fw.write(pred_text) + + return True + + else: + with open(gtfile, "r") as fr: + true_text = fr.read() + + assert pred_text == true_text, "pred_itxt==true_itxt" + return pred_text == true_text + + +def test_e2e_xlsx_conversions(): + + xlsx_paths = get_xlsx_paths() + converter = get_converter() + + for xlsx_path in xlsx_paths: + # print(f"converting {xlsx_path}") + + gt_path = ( + xlsx_path.parent.parent / "groundtruth" / "docling_v2" / xlsx_path.name + ) + + conv_result: ConversionResult = converter.convert(xlsx_path) + + doc: DoclingDocument = conv_result.document + + pred_md: str = doc.export_to_markdown() + assert verify_export(pred_md, str(gt_path) + ".md"), "export to md" + + pred_itxt: str = doc._export_to_indented_text( + max_text_len=70, explicit_tables=False + ) + assert verify_export( + pred_itxt, str(gt_path) + ".itxt" + ), "export to indented-text" + + pred_json: str = json.dumps(doc.export_to_dict(), indent=2) + assert verify_export(pred_json, str(gt_path) + ".json"), "export to json"