diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py new file mode 100644 index 00000000..508b0e8d --- /dev/null +++ b/docling/backend/msexcel_backend.py @@ -0,0 +1,374 @@ +import logging +from io import BytesIO +from pathlib import Path +from typing import Dict, Set, Tuple, Union + +from docling_core.types.doc import ( + DoclingDocument, + DocumentOrigin, + GroupLabel, + ImageRef, + TableCell, + TableData, +) + +# from lxml import etree +from openpyxl import Workbook, load_workbook +from openpyxl.cell.cell import Cell +from openpyxl.drawing.image import Image +from openpyxl.worksheet.worksheet import Worksheet + +from docling.backend.abstract_backend import DeclarativeDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import InputDocument + +_log = logging.getLogger(__name__) + +from typing import Any, List + +from pydantic import BaseModel + + +class ExcelCell(BaseModel): + row: int + col: int + text: str + row_span: int + col_span: int + + +class ExcelTable(BaseModel): + num_rows: int + num_cols: int + data: List[ExcelCell] + + +class MsExcelDocumentBackend(DeclarativeDocumentBackend): + + def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): + super().__init__(in_doc, path_or_stream) + + # Initialise the parents for the hierarchy + self.max_levels = 10 + + self.parents: Dict[int, Any] = {} + for i in range(-1, self.max_levels): + self.parents[i] = None + + self.workbook = None + try: + if isinstance(self.path_or_stream, BytesIO): + self.workbook = load_workbook(filename=self.path_or_stream) + + elif isinstance(self.path_or_stream, Path): + self.workbook = load_workbook(filename=str(self.path_or_stream)) + + self.valid = True + except Exception as e: + self.valid = False + + raise RuntimeError( + f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}" + ) from e + + def is_valid(self) -> bool: + _log.info(f"valid: {self.valid}") + return self.valid + + @classmethod + def supports_pagination(cls) -> bool: + return True + + def unload(self): + if isinstance(self.path_or_stream, BytesIO): + self.path_or_stream.close() + + self.path_or_stream = None + + @classmethod + def supported_formats(cls) -> Set[InputFormat]: + return {InputFormat.XLSX} + + def convert(self) -> DoclingDocument: + # Parses the XLSX into a structured document model. + + origin = DocumentOrigin( + filename=self.file.name or "file.xlsx", + mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + binary_hash=self.document_hash, + ) + + doc = DoclingDocument(name=self.file.stem or "file.xlsx", origin=origin) + + if self.is_valid(): + doc = self._convert_workbook(doc) + else: + raise RuntimeError( + f"Cannot convert doc with {self.document_hash} because the backend failed to init." + ) + + return doc + + def _convert_workbook(self, doc: DoclingDocument) -> DoclingDocument: + + if self.workbook is not None: + + # Iterate over all sheets + for sheet_name in self.workbook.sheetnames: + _log.info(f"Processing sheet: {sheet_name}") + + # Access the sheet by name + sheet = self.workbook[sheet_name] + + self.parents[0] = doc.add_group( + parent=None, + label=GroupLabel.SECTION, + name=f"sheet: {sheet_name}", + ) + + doc = self._convert_sheet(doc, sheet) + else: + _log.error("Workbook is not initialized.") + + return doc + + def _convert_sheet(self, doc: DoclingDocument, sheet: Worksheet): + + doc = self._find_tables_in_sheet(doc, sheet) + + doc = self._find_images_in_sheet(doc, sheet) + + return doc + + def _find_tables_in_sheet(self, doc: DoclingDocument, sheet: Worksheet): + + tables = self._find_data_tables(sheet) + + for excel_table in tables: + num_rows = excel_table.num_rows + num_cols = excel_table.num_cols + + table_data = TableData( + num_rows=num_rows, + num_cols=num_cols, + table_cells=[], + ) + + for excel_cell in excel_table.data: + + cell = TableCell( + text=excel_cell.text, + row_span=excel_cell.row_span, + col_span=excel_cell.col_span, + start_row_offset_idx=excel_cell.row, + end_row_offset_idx=excel_cell.row + excel_cell.row_span, + start_col_offset_idx=excel_cell.col, + end_col_offset_idx=excel_cell.col + excel_cell.col_span, + col_header=False, + row_header=False, + ) + table_data.table_cells.append(cell) + + doc.add_table(data=table_data, parent=self.parents[0]) + + return doc + + def _find_data_tables(self, sheet: Worksheet): + """ + Find all compact rectangular data tables in a sheet. + """ + # _log.info("find_data_tables") + + tables = [] # List to store found tables + visited: set[Tuple[int, int]] = set() # Track already visited cells + + # Iterate over all cells in the sheet + for ri, row in enumerate(sheet.iter_rows(values_only=False)): + for rj, cell in enumerate(row): + + # Skip empty or already visited cells + if cell.value is None or (ri, rj) in visited: + continue + + # If the cell starts a new table, find its bounds + table_bounds, visited_cells = self._find_table_bounds( + sheet, ri, rj, visited + ) + + visited.update(visited_cells) # Mark these cells as visited + tables.append(table_bounds) + + return tables + + def _find_table_bounds( + self, + sheet: Worksheet, + start_row: int, + start_col: int, + visited: set[Tuple[int, int]], + ): + """ + Determine the bounds of a compact rectangular table. + Returns: + - A dictionary with the bounds and data. + - A set of visited cell coordinates. + """ + _log.info("find_table_bounds") + + max_row = self._find_table_bottom(sheet, start_row, start_col) + max_col = self._find_table_right(sheet, start_row, start_col) + + # Collect the data within the bounds + data = [] + visited_cells = set() + for ri in range(start_row, max_row + 1): + for rj in range(start_col, max_col + 1): + + cell = sheet.cell(row=ri + 1, column=rj + 1) # 1-based indexing + + # Check if the cell belongs to a merged range + row_span = 1 + col_span = 1 + + # _log.info(sheet.merged_cells.ranges) + for merged_range in sheet.merged_cells.ranges: + + if ( + merged_range.min_row <= ri + 1 + and ri + 1 <= merged_range.max_row + and merged_range.min_col <= rj + 1 + and rj + 1 <= merged_range.max_col + ): + + row_span = merged_range.max_row - merged_range.min_row + 1 + col_span = merged_range.max_col - merged_range.min_col + 1 + break + + if (ri, rj) not in visited_cells: + data.append( + ExcelCell( + row=ri - start_row, + col=rj - start_col, + text=str(cell.value), + row_span=row_span, + col_span=col_span, + ) + ) + # _log.info(f"cell: {ri}, {rj} -> {ri - start_row}, {rj - start_col}, {row_span}, {col_span}: {str(cell.value)}") + + # Mark all cells in the span as visited + for span_row in range(ri, ri + row_span): + for span_col in range(rj, rj + col_span): + visited_cells.add((span_row, span_col)) + + return ( + ExcelTable( + num_rows=max_row + 1 - start_row, + num_cols=max_col + 1 - start_col, + data=data, + ), + visited_cells, + ) + + def _find_table_bottom(self, sheet: Worksheet, start_row: int, start_col: int): + """Function to find the bottom boundary of the table""" + + max_row = start_row + + while max_row < sheet.max_row - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=max_row + 2, column=start_col + 1) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_row to include the merged range if applicable + if merged_range: + max_row = max(max_row, merged_range.max_row - 1) + else: + max_row += 1 + + return max_row + + def _find_table_right(self, sheet: Worksheet, start_row: int, start_col: int): + """Function to find the right boundary of the table""" + + max_col = start_col + + while max_col < sheet.max_column - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=start_row + 1, column=max_col + 2) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_col to include the merged range if applicable + if merged_range: + max_col = max(max_col, merged_range.max_col - 1) + else: + max_col += 1 + + return max_col + + def _find_images_in_sheet( + self, doc: DoclingDocument, sheet: Worksheet + ) -> DoclingDocument: + + # FIXME: mypy does not agree with _images ... + """ + # Iterate over images in the sheet + for idx, image in enumerate(sheet._images): # Access embedded images + + image_bytes = BytesIO(image.ref.blob) + pil_image = Image.open(image_bytes) + + doc.add_picture( + parent=self.parents[0], + image=ImageRef.from_pil(image=pil_image, dpi=72), + caption=None, + ) + """ + + # FIXME: mypy does not agree with _charts ... + """ + for idx, chart in enumerate(sheet._charts): # Access embedded charts + chart_path = f"chart_{idx + 1}.png" + _log.info( + f"Chart found, but dynamic rendering is required for: {chart_path}" + ) + + _log.info(f"Chart {idx + 1}:") + + # Chart type + _log.info(f"Type: {type(chart).__name__}") + + # Title + if chart.title: + _log.info(f"Title: {chart.title}") + else: + _log.info("No title") + + # Data series + for series in chart.series: + _log.info(" => series ...") + _log.info(f"Data Series: {series.title}") + _log.info(f"Values: {series.values}") + _log.info(f"Categories: {series.categories}") + + # Position + # _log.info(f"Anchor Cell: {chart.anchor}") + """ + + return doc diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index d06b6097..311d6d01 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -32,6 +32,7 @@ class InputFormat(str, Enum): PDF = "pdf" ASCIIDOC = "asciidoc" MD = "md" + XLSX = "xlsx" class OutputFormat(str, Enum): @@ -49,6 +50,7 @@ class OutputFormat(str, Enum): InputFormat.HTML: ["html", "htm", "xhtml"], InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"], InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"], + InputFormat.XLSX: ["xlsx"], } FormatToMimeType: Dict[InputFormat, List[str]] = { @@ -72,7 +74,11 @@ class OutputFormat(str, Enum): InputFormat.PDF: ["application/pdf"], InputFormat.ASCIIDOC: ["text/asciidoc"], InputFormat.MD: ["text/markdown", "text/x-markdown"], + InputFormat.XLSX: [ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ], } + MimeTypeToFormat = { mime: fmt for fmt, mimes in FormatToMimeType.items() for mime in mimes } diff --git a/docling/document_converter.py b/docling/document_converter.py index f2d29e62..4e436d07 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -12,6 +12,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.html_backend import HTMLDocumentBackend from docling.backend.md_backend import MarkdownDocumentBackend +from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat @@ -44,6 +45,11 @@ def set_optional_field_default(self) -> "FormatOption": return self +class ExcelFormatOption(FormatOption): + pipeline_cls: Type = SimplePipeline + backend: Type[AbstractDocumentBackend] = MsExcelDocumentBackend + + class WordFormatOption(FormatOption): pipeline_cls: Type = SimplePipeline backend: Type[AbstractDocumentBackend] = MsWordDocumentBackend @@ -80,6 +86,9 @@ class ImageFormatOption(FormatOption): _format_to_default_options = { + InputFormat.XLSX: FormatOption( + pipeline_cls=SimplePipeline, backend=MsExcelDocumentBackend + ), InputFormat.DOCX: FormatOption( pipeline_cls=SimplePipeline, backend=MsWordDocumentBackend ), diff --git a/poetry.lock b/poetry.lock index c7e040d8..439f9136 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -13,87 +13,87 @@ files = [ [[package]] name = "aiohttp" -version = "3.11.2" +version = "3.11.4" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" files = [ - {file = "aiohttp-3.11.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:783741f534c14957fbe657d62a34b947ec06db23d45a2fd4a8aeb73d9c84d7e6"}, - {file = "aiohttp-3.11.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:435f7a08d8aa42371a94e7c141205a9cb092ba551084b5e0c57492e6673601a3"}, - {file = "aiohttp-3.11.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c681f34e2814bc6e1eef49752b338061b94a42c92734d0be9513447d3f83718c"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73a664478ae1ea011b5a710fb100b115ca8b2146864fa0ce4143ff944df714b8"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1d06c8fd8b453c3e553c956bd3b8395100401060430572174bb7876dd95ad49"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b1f4844909321ef2c1cee50ddeccbd6018cd8c8d1ddddda3f553e94a5859497"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdc6f8dce09281ae534eaf08a54f0d38612398375f28dad733a8885f3bf9b978"}, - {file = "aiohttp-3.11.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2d942421cf3a1d1eceae8fa192f1fbfb74eb9d3e207d35ad2696bd2ce2c987c"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:08ebe7a1d6c1e5ca766d68407280d69658f5f98821c2ba6c41c63cabfed159af"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2793d3297f3e49015140e6d3ea26142c967e07998e2fb00b6ee8d041138fbc4e"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4a23475d8d5c56e447b7752a1e2ac267c1f723f765e406c81feddcd16cdc97bc"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:556564d89e2f4a6e8fe000894c03e4e84cf0b6cfa5674e425db122633ee244d1"}, - {file = "aiohttp-3.11.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:57993f406ce3f114b2a6756d7809be3ffd0cc40f33e8f8b9a4aa1b027fd4e3eb"}, - {file = "aiohttp-3.11.2-cp310-cp310-win32.whl", hash = "sha256:177b000efaf8d2f7012c649e8aee5b0bf488677b1162be5e7511aa4f9d567607"}, - {file = "aiohttp-3.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:ff5d22eece44528023254b595c670dfcf9733ac6af74c4b6cb4f6a784dc3870c"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:50e0aee4adc9abcd2109c618a8d1b2c93b85ac277b24a003ab147d91e068b06d"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9aa4e68f1e4f303971ec42976fb170204fb5092de199034b57199a1747e78a2d"}, - {file = "aiohttp-3.11.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d84930b4145991214602372edd7305fc76b700220db79ac0dd57d3afd0f0a1ca"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4ec8afd362356b8798c8caa806e91deb3f0602d8ffae8e91d2d3ced2a90c35e"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb0544a0e8294a5a5e20d3cacdaaa9a911d7c0a9150f5264aef36e7d8fdfa07e"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7b0a1618060e3f5aa73d3526ca2108a16a1b6bf86612cd0bb2ddcbef9879d06"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d878a0186023ac391861958035174d0486f3259cabf8fd94e591985468da3ea"}, - {file = "aiohttp-3.11.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e33a7eddcd07545ccf5c3ab230f60314a17dc33e285475e8405e26e21f02660"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4d7fad8c456d180a6d2f44c41cfab4b80e2e81451815825097db48b8293f59d5"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8d954ba0eae7f33884d27dc00629ca4389d249eb8d26ca07c30911257cae8c96"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:afa55e863224e664a782effa62245df73fdfc55aee539bed6efacf35f6d4e4b7"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:10a5f91c319d9d4afba812f72984816b5fcd20742232ff7ecc1610ffbf3fc64d"}, - {file = "aiohttp-3.11.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6e8e19a80ba194db5c06915a9df23c0c06e0e9ca9a4db9386a6056cca555a027"}, - {file = "aiohttp-3.11.2-cp311-cp311-win32.whl", hash = "sha256:9c8d1db4f65bbc9d75b7b271d68fb996f1c8c81a525263862477d93611856c2d"}, - {file = "aiohttp-3.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:2adb967454e10e69478ba4a8d8afbba48a7c7a8619216b7c807f8481cc66ddfb"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f833a80d9de9307d736b6af58c235b17ef7f90ebea7b9c49cd274dec7a66a2f1"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:382f853516664d2ebfc75dc01da4a10fdef5edcb335fe7b45cf471ce758ecb18"}, - {file = "aiohttp-3.11.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d3a2bcf6c81639a165da93469e1e0aff67c956721f3fa9c0560f07dd1e505116"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de3b4d5fb5d69749104b880a157f38baeea7765c93d9cd3837cedd5b84729e10"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a90a0dc4b054b5af299a900bf950fe8f9e3e54322bc405005f30aa5cacc5c98"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32334f35824811dd20a12cc90825d000e6b50faaeaa71408d42269151a66140d"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cba0b8d25aa2d450762f3dd6df85498f5e7c3ad0ddeb516ef2b03510f0eea32"}, - {file = "aiohttp-3.11.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bbb2dbc2701ab7e9307ca3a8fa4999c5b28246968e0a0202a5afabf48a42e22"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97fba98fc5d9ccd3d33909e898d00f2494d6a9eec7cbda3d030632e2c8bb4d00"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0ebdf5087e2ce903d8220cc45dcece90c2199ae4395fd83ca616fcc81010db2c"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:122768e3ae9ce74f981b46edefea9c6e5a40aea38aba3ac50168e6370459bf20"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5587da333b7d280a312715b843d43e734652aa382cba824a84a67c81f75b338b"}, - {file = "aiohttp-3.11.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:85de9904bc360fd29a98885d2bfcbd4e02ab33c53353cb70607f2bea2cb92468"}, - {file = "aiohttp-3.11.2-cp312-cp312-win32.whl", hash = "sha256:b470de64d17156c37e91effc109d3b032b39867000e2c126732fe01d034441f9"}, - {file = "aiohttp-3.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:3f617a48b70f4843d54f52440ea1e58da6bdab07b391a3a6aed8d3b311a4cc04"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5d90b5a3b0f32a5fecf5dd83d828713986c019585f5cddf40d288ff77f366615"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d23854e5867650d40cba54d49956aad8081452aa80b2cf0d8c310633f4f48510"}, - {file = "aiohttp-3.11.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:486273d3b5af75a80c31c311988931bdd2a4b96a74d5c7f422bad948f99988ef"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9075313f8e41b481e4cb10af405054564b0247dc335db5398ed05f8ec38787e2"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44b69c69c194ffacbc50165911cf023a4b1b06422d1e1199d3aea82eac17004e"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b339d91ac9060bd6ecdc595a82dc151045e5d74f566e0864ef3f2ba0887fec42"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64e8f5178958a9954043bc8cd10a5ae97352c3f2fc99aa01f2aebb0026010910"}, - {file = "aiohttp-3.11.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3129151378f858cdc4a0a4df355c9a0d060ab49e2eea7e62e9f085bac100551b"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:14eb6c628432720e41b4fab1ada879d56cfe7034159849e083eb536b4c2afa99"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e57a10aacedcf24666f4c90d03e599f71d172d1c5e00dcf48205c445806745b0"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:66e58a2e8c7609a3545c4b38fb8b01a6b8346c4862e529534f7674c5265a97b8"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9b6d15adc9768ff167614ca853f7eeb6ee5f1d55d5660e3af85ce6744fed2b82"}, - {file = "aiohttp-3.11.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2914061f5ca573f990ec14191e6998752fa8fe50d518e3405410353c3f44aa5d"}, - {file = "aiohttp-3.11.2-cp313-cp313-win32.whl", hash = "sha256:1c2496182e577042e0e07a328d91c949da9e77a2047c7291071e734cd7a6e780"}, - {file = "aiohttp-3.11.2-cp313-cp313-win_amd64.whl", hash = "sha256:cccb2937bece1310c5c0163d0406aba170a2e5fb1f0444d7b0e7fdc9bd6bb713"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:994cb893936dd2e1803655ae8667a45066bfd53360b148e22b4e3325cc5ea7a3"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3666c750b73ce463a413692e3a57c60f7089e2d9116a2aa5a0f0eaf2ae325148"}, - {file = "aiohttp-3.11.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6ad9a7d2a3a0f235184426425f80bd3b26c66b24fd5fddecde66be30c01ebe6e"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c979fc92aba66730b66099cd5becb42d869a26c0011119bc1c2478408a8bf7a"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:766d0ebf8703d28f854f945982aa09224d5a27a29594c70d921c43c3930fe7ac"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79efd1ee3827b2f16797e14b1e45021206c3271249b4d0025014466d416d7413"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d6e069b882c1fdcbe5577dc4be372eda705180197140577a4cddb648c29d22e"}, - {file = "aiohttp-3.11.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e9a766c346b2ed7e88937919d84ed64b4ef489dad1d8939f806ee52901dc142"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2b02a68b9445c70d7f5c8b578c5f5e5866b1d67ca23eb9e8bc8658ae9e3e2c74"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:374baefcb1b6275f350da605951f5f02487a9bc84a574a7d5b696439fabd49a3"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d2f991c18132f3e505c108147925372ffe4549173b7c258cf227df1c5977a635"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:34f37c59b12bc3afc52bab6fcd9cd3be82ff01c4598a84cbea934ccb3a9c54a0"}, - {file = "aiohttp-3.11.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:33af11eca7bb0f5c6ffaf5e7d9d2336c2448f9c6279b93abdd6f3c35f9ee321f"}, - {file = "aiohttp-3.11.2-cp39-cp39-win32.whl", hash = "sha256:83a70e22e0f6222effe7f29fdeba6c6023f9595e59a0479edacfbd7de4b77bb7"}, - {file = "aiohttp-3.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:c28c1677ea33ccb8b14330560094cc44d3ff4fad617a544fd18beb90403fe0f1"}, - {file = "aiohttp-3.11.2.tar.gz", hash = "sha256:68d1f46f9387db3785508f5225d3acbc5825ca13d9c29f2b5cce203d5863eb79"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a60f8206818e3582c999c999c799ab068e14f1870ade47d1fe8536dbfd88010b"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5786e5926f888ce3a996d38d9c9b8f9306f399edb1f1ca3ce7760dab9b1043c"}, + {file = "aiohttp-3.11.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:262e45dbd7f1829bcb024259f65b2cf69d1ef5b37626af6955a1c487613aeb3a"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:696adff3594bd449e0fe287441062bdc6f5300928426275b39ed27884ba083a7"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd1411ecfc070af4df129e81fe42c799d95d81c29c22d2c3e4341d974c38f1a"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06defa9017ab50d215446ebbee294e07eb2fcee72d9a909a08192cfacbd43a08"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bc936d10b8fa3f2aa66e59e034085208b588442263400ddb042703d0db99421"}, + {file = "aiohttp-3.11.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:769457243dc4bc902d376cd14c5c7ec234a4faadb4f283dc2738f004cce9a9e1"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a360c18b2cb391fec9585ba1efc55150e2fbc6100308113117dfea521e810d8"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3e9fd9c11299d6d230eb2669fd1ed0238d33970e36b495b0432ace7f157fc931"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0ccbe8ece8a7796ef41b86a3240034c5918d9b324c2ae48fa0be33565e297c64"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9a8b6b3c788a8a6f88f5ce23d729cfde7a2ccebbeb09db0822ef266de0445a27"}, + {file = "aiohttp-3.11.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cbe3e356523d0b336543996f92a0e65f760be82447db21c95c60392c8075ff5c"}, + {file = "aiohttp-3.11.4-cp310-cp310-win32.whl", hash = "sha256:a54424050d1eb36edfef913b1bc8552d52a37864c0ea7df3e1e764663e11053a"}, + {file = "aiohttp-3.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:a51f983d91edae7777b5a2af8e5d83224ba01284502c6874a17647ad6cbf0211"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:89261fee04715083ef3b5a0d222b094021793c1728b8ff21da361c79f6384095"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ef6eb1367046fb22085f10c5c84ea2efd0d836ad8088306d652ab1d743faf9e"}, + {file = "aiohttp-3.11.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d68bb99bc6a4b0a3eceb95a246f5a0262e600e094b5178c2b1ab0f4bcbae6729"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a550b4ff70d06c15057d75ddad89a3e7c496e0609d28c567c20b61cd1265c0a6"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9b41e0fb3b415beccd6d0c6e5f3ee34b7952cd76120a1db3e45507b83dc5ef81"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8feffa8396724116be5bc05bf4fcba0c738cbe908c82a95f71371e32b28cd2ca"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1dd5b7947e23a08c70d4c1924809b91211f14136ffd13d303dc487913cfebfeb"}, + {file = "aiohttp-3.11.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab5c6a521b156edef13a57a6d524903c547573ff8101e3d1bbe9ee1b97267973"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:010bc9606f798eda8ef071759c7b163893071502bcaedc7d5dc49f9d8f12e553"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e7d182164aebad4e2faf2742ee7486d4af73d933461adbd8f183ac9b1837323c"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:88e681c0d17bb285d2ccbb73ae77ef86339b632ee7967044c2284411120b9730"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0d2cea21ec05b832e9f6a2390b23d32ce5575f6cf4812bd171d4493f59c101fe"}, + {file = "aiohttp-3.11.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:635397b5b4de2397f8136f8fd15c8ebee560e36473195c7aa992ffb8e46acdd3"}, + {file = "aiohttp-3.11.4-cp311-cp311-win32.whl", hash = "sha256:cb2d5a24586b508f658ddd710f7d4b7e4f5656cb5d569aeb1f432c1c3704347a"}, + {file = "aiohttp-3.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:ee081375d10fa2f3f7b0d050c8b9c1ae23190e1d9be256035bf8a41059c4df3a"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:5cd60673be31449c63f59886f3581478bbdfaddd87e7394a4d73ad134d9be9b9"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4ff6105856ae688b29d5daaede1256f5e02e9d5cb3059f8f5ef55d975c2e6992"}, + {file = "aiohttp-3.11.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b169507c98b924fd68b82ae366c285daf6d22456835294c329c3226d61e1f69d"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec84106c8b7ff347be06bf579c298a23b6d1d2225c57273a8cd502f257125d4"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03d53b0888f984f4f0c216a37577ee7e7b1ed1dac89cdd2fde61bf2ccb32009b"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:822dedad93947fcb1096cc41ee8fd32e9f652777561a37c740e5335699f01cea"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aef239c307f3a3f830933d612c0aef4ad4b3aa9ce5233a0954262a00f5c379f1"}, + {file = "aiohttp-3.11.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49eb5a0338f141ef32299d48f1415486f47953d37b0c7fa6d778b73b66f3a7e2"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7be4efe60e9bddf78ee165a296e80170147282081e1366f0580cf4cc0fb1182f"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66e83a9a1131f0060aaedcc57f1a7e489898b6c3607eededccc7a9f80b95bdb4"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a7986fb988314fd2225c1ecab45fd457e1f2c097dcc3c0aacd2a7aec7486beb6"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a34c30e1461da3a69c5bdcfce44418b6f969e1e68ebf367edfa5eaab380abf7a"}, + {file = "aiohttp-3.11.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb4c676ab99ca2dd231928d481e19cd540155dff36e70e613179c4927bd520b8"}, + {file = "aiohttp-3.11.4-cp312-cp312-win32.whl", hash = "sha256:d40d9a740053cb7fef72442fa7bd699060ff4c710971ebdb8dd7c8b36417570f"}, + {file = "aiohttp-3.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:365df6cf2ad144479ba0e0b58abdc5276923676d34da4c1c45613a80d2aac130"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f307632f3eaa676f8c2f5df11e4c00ad47dfa79b06cb2fa39156a4e9c6821bdb"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc2d64b1747efa183ced57b6bce53c9ea8e16e53419e389051b2a214ad0ed051"}, + {file = "aiohttp-3.11.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f37ece590451ecffc815f2eb41f07191d1a31a0404361d1ae2ed532e05c86da4"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b10b316413c80a4dcc5228c092a8d019e4b75d4efbca8988cb5b67ae9fa56881"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:beaed1b2d03033dd301a7b67430f03c8255d6856a269c20995a0292de596519e"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:518578d6821c942362daa14a56f26b739abeede6e408b0b83e27dfcde17730f7"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e09bc44a1abbd96f55d15330d6cab80459cb8b06a0b656efd712ce47a3710d"}, + {file = "aiohttp-3.11.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ae8480148d696dae49126e97568333fc01493069ad46a94b82f69c7a33197ea"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b71aab89800fa2eaeb28923ee05e7e56c28dab4ebdba524db06e963431bf6192"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:821c9c640d3dc456c6a7b005e38bc5af05326b6a08ce91a068719934d108a1bb"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d60255f3ed71aa14a2e75383543ca31bd362fdc7f0d2eafc060d85a9051598df"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9788781f57fb732426ae74b9955b899e677ce42b848e60a11be29358fb20c976"}, + {file = "aiohttp-3.11.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94acecf2eee13a45f627ed25a28f5a7f2db66b90ff94cd7a1e9cc1ad32cddd43"}, + {file = "aiohttp-3.11.4-cp313-cp313-win32.whl", hash = "sha256:d0fd6510c6d67d08ec80d9ba10cd340a8cfb0dd33436c858ed38d4564abb27c7"}, + {file = "aiohttp-3.11.4-cp313-cp313-win_amd64.whl", hash = "sha256:474f7266a61d1c3218ef4ec0325747884b2d5a13fab5bff5dd3b55d9c849406a"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cfe8646a24856624c1eb7649da99333f0d7e75d9cf7c155ea870957d24b7c63c"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e69d9869df50dd591228c62fbb3923d6124517d6bfc47a804492813888b497be"}, + {file = "aiohttp-3.11.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb4f1fe110332651c00d2df160978cf1be70896ed9e612ff7c7e67955091b2c4"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d97668595bf03299148ea968fed2195cc76ad063aeec8161731aa6a5dbc2f675"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c0b3378dc294ad6ec6c038ed57164165e0b83ef5f61eee72f6eefccd7df34b8"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0898a77298dc24eef368511d98e551e0b2db293fa9b40c982f4d5ab4d8d2a3a"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ecdf43ddde709c336a655c8b3858c56af8f7402de2572001a5a99f7bebf2f78"}, + {file = "aiohttp-3.11.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bf9c139dfa004b65d2d71906abc593dcafe78a508f33d56c1ca9d87b18337f"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2d978a95e4b58ef1fd937fbe347ab397c79ba24e17912595b54faafb88b9b937"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e32517c01905e0f4e665c3f3a495868ad996a32c243fcd917587d740253d589"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:4275160583df18158e0d6789797ad314a14ae611b98933fbe7d7a1c3dcc6bad4"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1ff7afc3c461bd9217e2b8a90ddbe5edd94687d5a331c4ae6166dca5876d1a4b"}, + {file = "aiohttp-3.11.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:83bd5aa621b732a0ca1aa3490abd2b010247c4677371a804431935aeedf26e74"}, + {file = "aiohttp-3.11.4-cp39-cp39-win32.whl", hash = "sha256:542a4610571b0affc6e13dda9357235f5f1f2ad9859acc69b188eb53901292d6"}, + {file = "aiohttp-3.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:a468b1b9d5499cbfd0411f5d28adbe651c90508540fdaefb4b7a2171a837a88d"}, + {file = "aiohttp-3.11.4.tar.gz", hash = "sha256:9d95cce8bb010597b3f2217155befe4708e0538d3548aa08d640ebf54e3f57cb"}, ] [package.dependencies] @@ -1031,6 +1031,17 @@ django = ["dj-database-url", "dj-email-url", "django-cache-url"] lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"] tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -3561,6 +3572,20 @@ numpy = [ {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "orjson" version = "3.10.11" @@ -3663,43 +3688,31 @@ python-versions = ">=3.9" files = [ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, @@ -6457,13 +6470,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "transformers" -version = "4.46.2" +version = "4.46.3" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.8.0" files = [ - {file = "transformers-4.46.2-py3-none-any.whl", hash = "sha256:c921f4406b78e6518c97b618c5acd1cf8a4f2315b6b727f4bf9e01496eef849c"}, - {file = "transformers-4.46.2.tar.gz", hash = "sha256:3d85410881e1c074be767877bf33c83231ec11529f274a6044ecb20c157ba14e"}, + {file = "transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef"}, + {file = "transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc"}, ] [package.dependencies] @@ -6609,6 +6622,17 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-openpyxl" +version = "3.1.5.20241114" +description = "Typing stubs for openpyxl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-openpyxl-3.1.5.20241114.tar.gz", hash = "sha256:caeb9aafed8a5ffabdc74f880b148d90375364a1cfe7915d5065c5d79f3fe6a2"}, + {file = "types_openpyxl-3.1.5.20241114-py3-none-any.whl", hash = "sha256:f2925f595b08f5aef1baa725c9ee40baaf51beb05d98ac150593d3bdd37b1029"}, +] + [[package]] name = "types-pytz" version = "2024.2.0.20241003" @@ -7212,4 +7236,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c7a2f4e30564c5bcd7ed96f203028f781a05ff2103698091616c8aff34ab3493" +content-hash = "1efd17010c7b811afb0c3a09b53d489cc6ac443b4edb6d6e5399b5d6b50d574d" diff --git a/pyproject.toml b/pyproject.toml index 088320e9..6a01764f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ python-pptx = "^1.0.2" beautifulsoup4 = "^4.12.3" pandas = "^2.1.4" marko = "^2.1.2" +openpyxl = "^3.1.5" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^24.4.2"} @@ -65,6 +66,7 @@ pandas-stubs = "^2.1.4.231227" ipykernel = "^6.29.5" ipywidgets = "^8.1.5" nbqa = "^1.9.0" +types-openpyxl = "^3.1.5.20241114" [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.5.40" diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt new file mode 100644 index 00000000..cab5f63b --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt @@ -0,0 +1,10 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: section: group sheet: Sheet1 + item-2 at level 2: table with [7x3] + item-3 at level 1: section: group sheet: Sheet2 + item-4 at level 2: table with [9x4] + item-5 at level 2: table with [5x3] + item-6 at level 2: table with [5x3] + item-7 at level 1: section: group sheet: Sheet3 + item-8 at level 2: table with [7x3] + item-9 at level 2: table with [7x3] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.json b/tests/data/groundtruth/docling_v2/test-01.xlsx.json new file mode 100644 index 00000000..9a9e0d52 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.json @@ -0,0 +1,3240 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.0.0", + "name": "test-01", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "binary_hash": 9744611217659152490, + "filename": "test-01.xlsx" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/groups/0" + }, + { + "$ref": "#/groups/1" + }, + { + "$ref": "#/groups/2" + } + ], + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/0" + } + ], + "name": "sheet: Sheet1", + "label": "section" + }, + { + "self_ref": "#/groups/1", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/1" + }, + { + "$ref": "#/tables/2" + }, + { + "$ref": "#/tables/3" + } + ], + "name": "sheet: Sheet2", + "label": "section" + }, + { + "self_ref": "#/groups/2", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/4" + }, + { + "$ref": "#/tables/5" + } + ], + "name": "sheet: Sheet3", + "label": "section" + } + ], + "texts": [], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "-6", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "col-4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "28", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "32", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 9, + "num_cols": 4, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "col-4", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "20", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "28", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "24", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "32", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 5, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 5, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "col-1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "col-2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "col-3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/4", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/5", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "pages": {} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.md b/tests/data/groundtruth/docling_v2/test-01.xlsx.md new file mode 100644 index 00000000..4a059c60 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.md @@ -0,0 +1,51 @@ +| first | second | third | +|----------|-----------|---------| +| 1 | 5 | 9 | +| 2 | 4 | 6 | +| 3 | 3 | 3 | +| 4 | 2 | 0 | +| 5 | 1 | -3 | +| 6 | 0 | -6 | + +| col-1 | col-2 | col-3 | col-4 | +|---------|---------|---------|---------| +| 1 | 2 | 3 | 4 | +| 2 | 4 | 6 | 8 | +| 3 | 6 | 9 | 12 | +| 4 | 8 | 12 | 16 | +| 5 | 10 | 15 | 20 | +| 6 | 12 | 18 | 24 | +| 7 | 14 | 21 | 28 | +| 8 | 16 | 24 | 32 | + +| col-1 | col-2 | col-3 | +|---------|---------|---------| +| 1 | 2 | 3 | +| 2 | 4 | 6 | +| 3 | 6 | 9 | +| 4 | 8 | 12 | + +| col-1 | col-2 | col-3 | +|---------|---------|---------| +| 1 | 2 | 3 | +| 2 | 4 | 6 | +| 3 | 6 | 9 | +| 4 | 8 | 12 | + +| first | header | header | +|----------|----------|----------| +| first | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | + +| first (f) | header (f) | header (f) | +|-------------|--------------|--------------| +| first (f) | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | \ No newline at end of file diff --git a/tests/data/xlsx/test-01.xlsx b/tests/data/xlsx/test-01.xlsx new file mode 100644 index 00000000..ab75b72d Binary files /dev/null and b/tests/data/xlsx/test-01.xlsx differ diff --git a/tests/test_backend_msexcel.py b/tests/test_backend_msexcel.py new file mode 100644 index 00000000..d13959ed --- /dev/null +++ b/tests/test_backend_msexcel.py @@ -0,0 +1,77 @@ +import json +import os +from pathlib import Path + +from docling.backend.msword_backend import MsWordDocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import ( + ConversionResult, + InputDocument, + SectionHeaderItem, +) +from docling.document_converter import DocumentConverter + +GENERATE = False + + +def get_xlsx_paths(): + + # Define the directory you want to search + directory = Path("./tests/data/xlsx/") + + # List all PDF files in the directory and its subdirectories + pdf_files = sorted(directory.rglob("*.xlsx")) + return pdf_files + + +def get_converter(): + + converter = DocumentConverter(allowed_formats=[InputFormat.XLSX]) + + return converter + + +def verify_export(pred_text: str, gtfile: str): + + if not os.path.exists(gtfile) or GENERATE: + with open(gtfile, "w") as fw: + fw.write(pred_text) + + return True + + else: + with open(gtfile, "r") as fr: + true_text = fr.read() + + assert pred_text == true_text, "pred_itxt==true_itxt" + return pred_text == true_text + + +def test_e2e_xlsx_conversions(): + + xlsx_paths = get_xlsx_paths() + converter = get_converter() + + for xlsx_path in xlsx_paths: + # print(f"converting {xlsx_path}") + + gt_path = ( + xlsx_path.parent.parent / "groundtruth" / "docling_v2" / xlsx_path.name + ) + + conv_result: ConversionResult = converter.convert(xlsx_path) + + doc: DoclingDocument = conv_result.document + + pred_md: str = doc.export_to_markdown() + assert verify_export(pred_md, str(gt_path) + ".md"), "export to md" + + pred_itxt: str = doc._export_to_indented_text( + max_text_len=70, explicit_tables=False + ) + assert verify_export( + pred_itxt, str(gt_path) + ".itxt" + ), "export to indented-text" + + pred_json: str = json.dumps(doc.export_to_dict(), indent=2) + assert verify_export(pred_json, str(gt_path) + ".json"), "export to json"