From 5e4c6b4f8587c03495fe68f7b1a0235066c798a2 Mon Sep 17 00:00:00 2001 From: <> Date: Mon, 9 Dec 2024 07:31:13 +0000 Subject: [PATCH] Deployed c8ecdd9 with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 1705 ++ api_reference/docling_document/index.html | 15161 ++++++++++++++++ api_reference/document_converter/index.html | 7193 ++++++++ api_reference/pipeline_options/index.html | 5697 ++++++ assets/_mkdocstrings.css | 143 + assets/docling_arch.png | Bin 0 -> 467220 bytes assets/docling_arch.pptx | Bin 0 -> 47700 bytes assets/docling_doc_hierarchy_1.png | Bin 0 -> 377808 bytes assets/docling_doc_hierarchy_2.png | Bin 0 -> 367053 bytes assets/docling_ecosystem.png | Bin 0 -> 238489 bytes assets/docling_ecosystem.pptx | Bin 0 -> 584282 bytes assets/docling_processing.png | Bin 0 -> 483500 bytes assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/logo.png | Bin 0 -> 264436 bytes assets/logo.svg | 116 + assets/stylesheets/main.6f8fc17f.min.css | 1 + assets/stylesheets/main.6f8fc17f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + cli/index.html | 1923 ++ concepts/architecture/index.html | 1797 ++ concepts/chunking/index.html | 1951 ++ concepts/docling_document/index.html | 1947 ++ concepts/index.html | 1776 ++ examples/batch_convert/index.html | 2703 +++ examples/custom_convert/index.html | 2688 +++ .../develop_picture_enrichment/index.html | 2609 +++ examples/export_figures/index.html | 2574 +++ examples/export_multimodal/index.html | 2610 +++ examples/export_tables/index.html | 2471 +++ examples/full_page_ocr/index.html | 2415 +++ examples/hybrid_chunking/index.html | 2935 +++ examples/hybrid_rag_qdrant/index.html | 2777 +++ examples/index.html | 1785 ++ examples/minimal/index.html | 2295 +++ examples/rag_langchain/index.html | 3006 +++ examples/rag_llamaindex/index.html | 3140 ++++ examples/run_md/index.html | 2393 +++ examples/run_with_formats/index.html | 2531 +++ faq/index.html | 2059 +++ index.html | 1836 ++ installation/index.html | 1959 ++ integrations/bee/index.html | 1789 ++ integrations/cloudera/index.html | 1788 ++ integrations/data_prep_kit/index.html | 1902 ++ integrations/docetl/index.html | 1789 ++ integrations/index.html | 1779 ++ integrations/instructlab/index.html | 1793 ++ integrations/kotaemon/index.html | 1789 ++ integrations/llamaindex/index.html | 1907 ++ integrations/prodigy/index.html | 1790 ++ integrations/rhel_ai/index.html | 1790 ++ integrations/spacy/index.html | 1790 ++ integrations/txtai/index.html | 1789 ++ objects.inv | Bin 0 -> 5746 bytes overrides/main.html | 7 + search/search_index.json | 1 + sitemap.xml | 163 + sitemap.xml.gz | Bin 0 -> 514 bytes stylesheets/extra.css | 3 + usage/index.html | 2174 +++ v2/index.html | 2191 +++ 101 files changed, 117706 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 api_reference/docling_document/index.html create mode 100644 api_reference/document_converter/index.html create mode 100644 api_reference/pipeline_options/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/docling_arch.png create mode 100644 assets/docling_arch.pptx create mode 100644 assets/docling_doc_hierarchy_1.png create mode 100644 assets/docling_doc_hierarchy_2.png create mode 100644 assets/docling_ecosystem.png create mode 100644 assets/docling_ecosystem.pptx create mode 100644 assets/docling_processing.png create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/logo.png create mode 100644 assets/logo.svg create mode 100644 assets/stylesheets/main.6f8fc17f.min.css create mode 100644 assets/stylesheets/main.6f8fc17f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 cli/index.html create mode 100644 concepts/architecture/index.html create mode 100644 concepts/chunking/index.html create mode 100644 concepts/docling_document/index.html create mode 100644 concepts/index.html create mode 100644 examples/batch_convert/index.html create mode 100644 examples/custom_convert/index.html create mode 100644 examples/develop_picture_enrichment/index.html create mode 100644 examples/export_figures/index.html create mode 100644 examples/export_multimodal/index.html create mode 100644 examples/export_tables/index.html create mode 100644 examples/full_page_ocr/index.html create mode 100644 examples/hybrid_chunking/index.html create mode 100644 examples/hybrid_rag_qdrant/index.html create mode 100644 examples/index.html create mode 100644 examples/minimal/index.html create mode 100644 examples/rag_langchain/index.html create mode 100644 examples/rag_llamaindex/index.html create mode 100644 examples/run_md/index.html create mode 100644 examples/run_with_formats/index.html create mode 100644 faq/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 integrations/bee/index.html create mode 100644 integrations/cloudera/index.html create mode 100644 integrations/data_prep_kit/index.html create mode 100644 integrations/docetl/index.html create mode 100644 integrations/index.html create mode 100644 integrations/instructlab/index.html create mode 100644 integrations/kotaemon/index.html create mode 100644 integrations/llamaindex/index.html create mode 100644 integrations/prodigy/index.html create mode 100644 integrations/rhel_ai/index.html create mode 100644 integrations/spacy/index.html create mode 100644 integrations/txtai/index.html create mode 100644 objects.inv create mode 100644 overrides/main.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 stylesheets/extra.css create mode 100644 usage/index.html create mode 100644 v2/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..8734a7b34 --- /dev/null +++ b/404.html @@ -0,0 +1,1705 @@ + + + +
+ + + + + + + + + + + + + + +This is an automatic generated API reference of the DoclingDocument type.
+ + +
doc
+
+
+Package for models defined by the Document type.
+ + + + + + + +Classes:
+DoclingDocument
+ –
+ DoclingDocument.
+DocumentOrigin
+ –
+ FileSource.
+DocItem
+ –
+ DocItem.
+DocItemLabel
+ –
+ DocItemLabel.
+ProvenanceItem
+ –
+ ProvenanceItem.
+GroupItem
+ –
+ GroupItem.
+GroupLabel
+ –
+ GroupLabel.
+NodeItem
+ –
+ NodeItem.
+PageItem
+ –
+ PageItem.
+FloatingItem
+ –
+ FloatingItem.
+TextItem
+ –
+ TextItem.
+TableItem
+ –
+ TableItem.
+TableCell
+ –
+ TableCell.
+TableData
+ –
+ BaseTableData.
+TableCellLabel
+ –
+ TableCellLabel.
+KeyValueItem
+ –
+ KeyValueItem.
+SectionHeaderItem
+ –
+ SectionItem.
+PictureItem
+ –
+ PictureItem.
+ImageRef
+ –
+ ImageRef.
+PictureClassificationClass
+ –
+ PictureClassificationData.
+PictureClassificationData
+ –
+ PictureClassificationData.
+RefItem
+ –
+ RefItem.
+BoundingBox
+ –
+ BoundingBox.
+CoordOrigin
+ –
+ CoordOrigin.
+ImageRefMode
+ –
+ ImageRefMode.
+Size
+ –
+ Size.
+
DoclingDocument
+
+
+
+ Bases: BaseModel
DoclingDocument.
+ + + + + + + + + +Methods:
+add_group
+ –
+ add_group.
+add_heading
+ –
+ add_heading.
+add_list_item
+ –
+ add_list_item.
+add_page
+ –
+ add_page.
+add_picture
+ –
+ add_picture.
+add_table
+ –
+ add_table.
+add_text
+ –
+ add_text.
+add_title
+ –
+ add_title.
+check_version_is_compatible
+ –
+ Check if this document version is compatible with current version.
+export_to_dict
+ –
+ Export to dict.
+export_to_document_tokens
+ –
+ Exports the document content to a DocumentToken format.
+export_to_element_tree
+ –
+ Export_to_element_tree.
+export_to_html
+ –
+ Serialize to HTML.
+export_to_markdown
+ –
+ Serialize to Markdown.
+export_to_text
+ –
+ export_to_text.
+iterate_items
+ –
+ iterate_elements.
+load_from_json
+ –
+ load_from_json.
+num_pages
+ –
+ num_pages.
+print_element_tree
+ –
+ Print_element_tree.
+save_as_document_tokens
+ –
+ Save the document content to a DocumentToken format.
+save_as_html
+ –
+ Save to HTML.
+save_as_json
+ –
+ Save as json.
+save_as_markdown
+ –
+ Save to markdown.
+save_as_yaml
+ –
+ Save as yaml.
+validate_document
+ –
+ validate_document.
+validate_tree
+ –
+ validate_tree.
+Attributes:
+body
+ (GroupItem
)
+ –
+ furniture
+ (GroupItem
)
+ –
+ groups
+ (List[GroupItem]
)
+ –
+ key_value_items
+ (List[KeyValueItem]
)
+ –
+ name
+ (str
)
+ –
+ origin
+ (Optional[DocumentOrigin]
)
+ –
+ pages
+ (Dict[int, PageItem]
)
+ –
+ pictures
+ (List[PictureItem]
)
+ –
+ schema_name
+ (Literal['DoclingDocument']
)
+ –
+ tables
+ (List[TableItem]
)
+ –
+ texts
+ (List[Union[SectionHeaderItem, ListItem, TextItem]]
)
+ –
+ version
+ (Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)]
)
+ –
+
body
+
+
+
furniture
+
+
+
name
+
+
+name: str
+
schema_name
+
+
+schema_name: Literal['DoclingDocument'] = 'DoclingDocument'
+
texts
+
+
+texts: List[
+ Union[SectionHeaderItem, ListItem, TextItem]
+] = []
+
version
+
+
+version: Annotated[
+ str,
+ StringConstraints(pattern=VERSION_PATTERN, strict=True),
+] = CURRENT_VERSION
+
add_group
+
+
+add_group(
+ label: Optional[GroupLabel] = None,
+ name: Optional[str] = None,
+ parent: Optional[GroupItem] = None,
+) -> GroupItem
+
add_group.
+:param label: Optional[GroupLabel]: (Default value = None) +:param name: Optional[str]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_heading
+
+
+add_heading(
+ text: str,
+ orig: Optional[str] = None,
+ level: LevelNumber = 1,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_heading.
+:param label: DocItemLabel: +:param text: str: +:param orig: Optional[str]: (Default value = None) +:param level: LevelNumber: (Default value = 1) +:param prov: Optional[ProvenanceItem]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_list_item
+
+
+add_list_item(
+ text: str,
+ enumerated: bool = False,
+ marker: Optional[str] = None,
+ orig: Optional[str] = None,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_list_item.
+:param label: str: +:param text: str: +:param orig: Optional[str]: (Default value = None) +:param prov: Optional[ProvenanceItem]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_page
+
+
+add_page.
+:param page_no: int: +:param size: Size:
+ +
add_picture
+
+
+add_picture(
+ annotations: List[PictureDataType] = [],
+ image: Optional[ImageRef] = None,
+ caption: Optional[Union[TextItem, RefItem]] = None,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_picture.
+:param data: List[PictureData]: (Default value = []) +:param caption: Optional[Union[TextItem: +:param RefItem]]: (Default value = None) +:param prov: Optional[ProvenanceItem]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_table
+
+
+add_table(
+ data: TableData,
+ caption: Optional[Union[TextItem, RefItem]] = None,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_table.
+:param data: BaseTableData: +:param caption: Optional[Union[TextItem: +:param RefItem]]: (Default value = None) +:param # This is not cool yet.prov: Optional[ProvenanceItem] +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_text
+
+
+add_text(
+ label: DocItemLabel,
+ text: str,
+ orig: Optional[str] = None,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_text.
+:param label: str: +:param text: str: +:param orig: Optional[str]: (Default value = None) +:param prov: Optional[ProvenanceItem]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
add_title
+
+
+add_title(
+ text: str,
+ orig: Optional[str] = None,
+ prov: Optional[ProvenanceItem] = None,
+ parent: Optional[GroupItem] = None,
+)
+
add_title.
+:param text: str: +:param orig: Optional[str]: (Default value = None) +:param prov: Optional[ProvenanceItem]: (Default value = None) +:param parent: Optional[GroupItem]: (Default value = None)
+ +
check_version_is_compatible
+
+
+check_version_is_compatible(v: str) -> str
+
Check if this document version is compatible with current version.
+ +
export_to_dict
+
+
+export_to_dict(
+ mode: str = "json",
+ by_alias: bool = True,
+ exclude_none: bool = True,
+) -> Dict
+
Export to dict.
+ +
export_to_document_tokens
+
+
+export_to_document_tokens(
+ delim: str = "\n",
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_content: bool = True,
+ add_page_index: bool = True,
+ add_table_cell_location: bool = False,
+ add_table_cell_label: bool = True,
+ add_table_cell_text: bool = True,
+ page_no: Optional[int] = None,
+ with_groups: bool = True,
+ newline: bool = True,
+) -> str
+
Exports the document content to a DocumentToken format.
+Operates on a slice of the document's body as defined through arguments +from_element and to_element; defaulting to the whole main_text.
+:param delim: str: (Default value = "\n\n") +:param from_element: int: (Default value = 0) +:param to_element: Optional[int]: (Default value = None) +:param labels: set[DocItemLabel] +:param xsize: int: (Default value = 100) +:param ysize: int: (Default value = 100) +:param add_location: bool: (Default value = True) +:param add_content: bool: (Default value = True) +:param add_page_index: bool: (Default value = True) +:param # table specific flagsadd_table_cell_location: bool +:param add_table_cell_label: bool: (Default value = True) +:param add_table_cell_text: bool: (Default value = True) +:returns: The content of the document formatted as a DocTags string. +:rtype: str
+ +
export_to_element_tree
+
+
+export_to_element_tree() -> str
+
Export_to_element_tree.
+ +
export_to_html
+
+
+export_to_html(
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ image_mode: ImageRefMode = PLACEHOLDER,
+ page_no: Optional[int] = None,
+ html_lang: str = "en",
+ html_head: str = _HTML_DEFAULT_HEAD,
+) -> str
+
Serialize to HTML.
+ +
export_to_markdown
+
+
+export_to_markdown(
+ delim: str = "\n",
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ strict_text: bool = False,
+ image_placeholder: str = "<!-- image -->",
+ image_mode: ImageRefMode = PLACEHOLDER,
+ indent: int = 4,
+ text_width: int = -1,
+ page_no: Optional[int] = None,
+) -> str
+
Serialize to Markdown.
+Operates on a slice of the document's body as defined through arguments +from_element and to_element; defaulting to the whole document.
+:param delim: Delimiter to use when concatenating the various + Markdown parts. (Default value = "\n"). +:type delim: str = "\n" +:param from_element: Body slicing start index (inclusive). + (Default value = 0). +:type from_element: int = 0 +:param to_element: Body slicing stop index + (exclusive). (Default value = maxint). +:type to_element: int = sys.maxsize +:param labels: The set of document labels to include in the export. +:type labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS +:param strict_text: bool: Whether to only include the text content + of the document. (Default value = False). +:type strict_text: bool = False +:param image_placeholder: The placeholder to include to position + images in the markdown. (Default value = "\<!-- image -->"). +:type image_placeholder: str = "" +:param image_mode: The mode to use for including images in the + markdown. (Default value = ImageRefMode.PLACEHOLDER). +:type image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER +:param indent: The indent in spaces of the nested lists. + (Default value = 4). +:type indent: int = 4 +:returns: The exported Markdown representation. +:rtype: str
+ +
export_to_text
+
+
+export_to_text(
+ delim: str = "\n\n",
+ from_element: int = 0,
+ to_element: int = 1000000,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+) -> str
+
export_to_text.
+ +
iterate_items
+
+
+iterate_items(
+ root: Optional[NodeItem] = None,
+ with_groups: bool = False,
+ traverse_pictures: bool = True,
+ page_no: Optional[int] = None,
+ _level: int = 0,
+) -> Iterable[Tuple[NodeItem, int]]
+
iterate_elements.
+:param root: Optional[NodeItem]: (Default value = None) +:param with_groups: bool: (Default value = False) +:param traverse_pictures: bool: (Default value = True) +:param page_no: Optional[int]: (Default value = None) +:param _level: (Default value = 0) +:param # fixed parameter: +:param carries through the node nesting level:
+ +
load_from_json
+
+
+load_from_json(filename: Path) -> DoclingDocument
+
load_from_json.
+:param filename: The filename to load a saved DoclingDocument from a .json. +:type filename: Path
+:returns: The loaded DoclingDocument. +:rtype: DoclingDocument
+ +
num_pages
+
+
+num_pages()
+
num_pages.
+ +
print_element_tree
+
+
+print_element_tree()
+
Print_element_tree.
+ +
save_as_document_tokens
+
+
+save_as_document_tokens(
+ filename: Path,
+ delim: str = "\n\n",
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_content: bool = True,
+ add_page_index: bool = True,
+ add_table_cell_location: bool = False,
+ add_table_cell_label: bool = True,
+ add_table_cell_text: bool = True,
+ page_no: Optional[int] = None,
+ with_groups: bool = True,
+)
+
Save the document content to a DocumentToken format.
+ +
save_as_html
+
+
+save_as_html(
+ filename: Path,
+ artifacts_dir: Optional[Path] = None,
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ image_mode: ImageRefMode = PLACEHOLDER,
+ page_no: Optional[int] = None,
+ html_lang: str = "en",
+ html_head: str = _HTML_DEFAULT_HEAD,
+)
+
Save to HTML.
+ +
save_as_json
+
+
+save_as_json(
+ filename: Path,
+ artifacts_dir: Optional[Path] = None,
+ image_mode: ImageRefMode = EMBEDDED,
+ indent: int = 2,
+)
+
Save as json.
+ +
save_as_markdown
+
+
+save_as_markdown(
+ filename: Path,
+ artifacts_dir: Optional[Path] = None,
+ delim: str = "\n",
+ from_element: int = 0,
+ to_element: int = maxsize,
+ labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
+ strict_text: bool = False,
+ image_placeholder: str = "<!-- image -->",
+ image_mode: ImageRefMode = PLACEHOLDER,
+ indent: int = 4,
+ text_width: int = -1,
+ page_no: Optional[int] = None,
+)
+
Save to markdown.
+ +
save_as_yaml
+
+
+save_as_yaml(
+ filename: Path,
+ artifacts_dir: Optional[Path] = None,
+ image_mode: ImageRefMode = EMBEDDED,
+ default_flow_style: bool = False,
+)
+
Save as yaml.
+ +
validate_document
+
+
+validate_document(d: DoclingDocument)
+
validate_document.
+ +
validate_tree
+
+
+validate_tree(root) -> bool
+
validate_tree.
+ +
DocumentOrigin
+
+
+
+ Bases: BaseModel
FileSource.
+ + + + + + + + + +Methods:
+parse_hex_string
+ –
+ parse_hex_string.
+validate_mimetype
+ –
+ validate_mimetype.
+Attributes:
+binary_hash
+ (Uint64
)
+ –
+ filename
+ (str
)
+ –
+ mimetype
+ (str
)
+ –
+ uri
+ (Optional[AnyUrl]
)
+ –
+
binary_hash
+
+
+binary_hash: Uint64
+
filename
+
+
+filename: str
+
mimetype
+
+
+mimetype: str
+
uri
+
+
+uri: Optional[AnyUrl] = None
+
parse_hex_string
+
+
+parse_hex_string(value)
+
parse_hex_string.
+ +
validate_mimetype
+
+
+validate_mimetype(v)
+
validate_mimetype.
+ +
DocItem
+
+
+
+ Bases: NodeItem
DocItem.
+ + + + + + + + + +Methods:
+get_image
+ –
+ Returns the image of this DocItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ label
+ (DocItemLabel
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image of this DocItem.
+The function returns None if this DocItem has no valid provenance or +if a valid image of the page containing this DocItem is not available +in doc.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
DocItemLabel
+
+
+
+ Bases: str
, Enum
DocItemLabel.
+ + + + + + + + + + + +Attributes:
+CAPTION
+ –
+ CHECKBOX_SELECTED
+ –
+ CHECKBOX_UNSELECTED
+ –
+ CODE
+ –
+ DOCUMENT_INDEX
+ –
+ FOOTNOTE
+ –
+ FORM
+ –
+ FORMULA
+ –
+ KEY_VALUE_REGION
+ –
+ LIST_ITEM
+ –
+ PAGE_FOOTER
+ –
+ PAGE_HEADER
+ –
+ PARAGRAPH
+ –
+ PICTURE
+ –
+ REFERENCE
+ –
+ SECTION_HEADER
+ –
+ TABLE
+ –
+ TEXT
+ –
+ TITLE
+ –
+
CAPTION
+
+
+CAPTION = 'caption'
+
CHECKBOX_SELECTED
+
+
+CHECKBOX_SELECTED = 'checkbox_selected'
+
CHECKBOX_UNSELECTED
+
+
+CHECKBOX_UNSELECTED = 'checkbox_unselected'
+
CODE
+
+
+CODE = 'code'
+
DOCUMENT_INDEX
+
+
+DOCUMENT_INDEX = 'document_index'
+
FOOTNOTE
+
+
+FOOTNOTE = 'footnote'
+
FORM
+
+
+FORM = 'form'
+
FORMULA
+
+
+FORMULA = 'formula'
+
KEY_VALUE_REGION
+
+
+KEY_VALUE_REGION = 'key_value_region'
+
LIST_ITEM
+
+
+LIST_ITEM = 'list_item'
+
PAGE_FOOTER
+
+
+PAGE_FOOTER = 'page_footer'
+
PAGE_HEADER
+
+
+PAGE_HEADER = 'page_header'
+
PARAGRAPH
+
+
+PARAGRAPH = 'paragraph'
+
PICTURE
+
+
+PICTURE = 'picture'
+
REFERENCE
+
+
+REFERENCE = 'reference'
+
SECTION_HEADER
+
+
+SECTION_HEADER = 'section_header'
+
TABLE
+
+
+TABLE = 'table'
+
TEXT
+
+
+TEXT = 'text'
+
TITLE
+
+
+TITLE = 'title'
+
ProvenanceItem
+
+
+
+ Bases: BaseModel
ProvenanceItem.
+ + + + + + + + + + + +Attributes:
+bbox
+ (BoundingBox
)
+ –
+ charspan
+ (Tuple[int, int]
)
+ –
+ page_no
+ (int
)
+ –
+
charspan
+
+
+charspan: Tuple[int, int]
+
page_no
+
+
+page_no: int
+
GroupItem
+
+
+
+ Bases: NodeItem
GroupItem.
+ + + + + + + + + +Methods:
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ label
+ (GroupLabel
)
+ –
+ model_config
+ –
+ name
+ (str
)
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
name
+
+
+name: str = 'group'
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
get_ref
+
+
+get_ref()
+
get_ref.
+ +
GroupLabel
+
+
+
+ Bases: str
, Enum
GroupLabel.
+ + + + + + + + + + + +Attributes:
+CHAPTER
+ –
+ LIST
+ –
+ ORDERED_LIST
+ –
+ SECTION
+ –
+ SHEET
+ –
+ SLIDE
+ –
+ UNSPECIFIED
+ –
+
CHAPTER
+
+
+CHAPTER = 'chapter'
+
LIST
+
+
+LIST = 'list'
+
ORDERED_LIST
+
+
+ORDERED_LIST = 'ordered_list'
+
SECTION
+
+
+SECTION = 'section'
+
SHEET
+
+
+SHEET = 'sheet'
+
SLIDE
+
+
+SLIDE = 'slide'
+
UNSPECIFIED
+
+
+UNSPECIFIED = 'unspecified'
+
NodeItem
+
+
+
+ Bases: BaseModel
NodeItem.
+ + + + + + + + + +Methods:
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
get_ref
+
+
+get_ref()
+
get_ref.
+ +
PageItem
+
+
+
+ Bases: BaseModel
PageItem.
+ + + + + + + + + + + +Attributes:
+ + + + + + + +
FloatingItem
+
+
+
+ Bases: DocItem
FloatingItem.
+ + + + + + + + + +Methods:
+caption_text
+ –
+ Computes the caption as a single text.
+get_image
+ –
+ Returns the image corresponding to this FloatingItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+captions
+ (List[RefItem]
)
+ –
+ children
+ (List[RefItem]
)
+ –
+ footnotes
+ (List[RefItem]
)
+ –
+ image
+ (Optional[ImageRef]
)
+ –
+ label
+ (DocItemLabel
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ references
+ (List[RefItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
caption_text
+
+
+caption_text(doc: DoclingDocument) -> str
+
Computes the caption as a single text.
+ +
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image corresponding to this FloatingItem.
+This function returns the PIL image from self.image if one is available. +Otherwise, it uses DocItem.get_image to get an image of this FloatingItem.
+In particular, when self.image is None, the function returns None if this +FloatingItem has no valid provenance or the doc does not contain a valid image +for the required page.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
TextItem
+
+
+
+ Bases: DocItem
TextItem.
+ + + + + + + + + +Methods:
+export_to_document_tokens
+ –
+ Export text element to document tokens format.
+get_image
+ –
+ Returns the image of this DocItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ label
+ (DocItemLabel
)
+ –
+ model_config
+ –
+ orig
+ (str
)
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ self_ref
+ (str
)
+ –
+ text
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
orig
+
+
+orig: str
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
text
+
+
+text: str
+
export_to_document_tokens
+
+
+export_to_document_tokens(
+ doc: DoclingDocument,
+ new_line: str = "\n",
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_content: bool = True,
+ add_page_index: bool = True,
+)
+
Export text element to document tokens format.
+:param doc: "DoclingDocument": +:param new_line: str: (Default value = "\n") +:param xsize: int: (Default value = 100) +:param ysize: int: (Default value = 100) +:param add_location: bool: (Default value = True) +:param add_content: bool: (Default value = True) +:param add_page_index: bool: (Default value = True)
+ +
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image of this DocItem.
+The function returns None if this DocItem has no valid provenance or +if a valid image of the page containing this DocItem is not available +in doc.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
TableItem
+
+
+
+ Bases: FloatingItem
TableItem.
+ + + + + + + + + +Methods:
+caption_text
+ –
+ Computes the caption as a single text.
+export_to_dataframe
+ –
+ Export the table as a Pandas DataFrame.
+export_to_document_tokens
+ –
+ Export table to document tokens format.
+export_to_html
+ –
+ Export the table as html.
+export_to_markdown
+ –
+ Export the table as markdown.
+export_to_otsl
+ –
+ Export the table as OTSL.
+get_image
+ –
+ Returns the image corresponding to this FloatingItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+captions
+ (List[RefItem]
)
+ –
+ children
+ (List[RefItem]
)
+ –
+ data
+ (TableData
)
+ –
+ footnotes
+ (List[RefItem]
)
+ –
+ image
+ (Optional[ImageRef]
)
+ –
+ label
+ (Literal[TABLE]
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ references
+ (List[RefItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
caption_text
+
+
+caption_text(doc: DoclingDocument) -> str
+
Computes the caption as a single text.
+ +
export_to_dataframe
+
+
+export_to_dataframe() -> DataFrame
+
Export the table as a Pandas DataFrame.
+ +
export_to_document_tokens
+
+
+export_to_document_tokens(
+ doc: DoclingDocument,
+ new_line: str = "\n",
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_caption: bool = True,
+ add_content: bool = True,
+ add_cell_location: bool = True,
+ add_cell_label: bool = True,
+ add_cell_text: bool = True,
+ add_page_index: bool = True,
+)
+
Export table to document tokens format.
+:param doc: "DoclingDocument": +:param new_line: str: (Default value = "\n") +:param xsize: int: (Default value = 100) +:param ysize: int: (Default value = 100) +:param add_location: bool: (Default value = True) +:param add_caption: bool: (Default value = True) +:param add_content: bool: (Default value = True) +:param add_cell_location: bool: (Default value = True) +:param add_cell_label: bool: (Default value = True) +:param add_cell_text: bool: (Default value = True) +:param add_page_index: bool: (Default value = True)
+ +
export_to_html
+
+
+export_to_html(
+ doc: Optional[DoclingDocument] = None,
+ add_caption: bool = True,
+) -> str
+
Export the table as html.
+ +
export_to_markdown
+
+
+export_to_markdown() -> str
+
Export the table as markdown.
+ +
export_to_otsl
+
+
+export_to_otsl(
+ doc: DoclingDocument,
+ add_cell_location: bool = True,
+ add_cell_text: bool = True,
+ xsize: int = 100,
+ ysize: int = 100,
+) -> str
+
Export the table as OTSL.
+ +
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image corresponding to this FloatingItem.
+This function returns the PIL image from self.image if one is available. +Otherwise, it uses DocItem.get_image to get an image of this FloatingItem.
+In particular, when self.image is None, the function returns None if this +FloatingItem has no valid provenance or the doc does not contain a valid image +for the required page.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
TableCell
+
+
+
+ Bases: BaseModel
TableCell.
+ + + + + + + + + +Methods:
+from_dict_format
+ –
+ from_dict_format.
+Attributes:
+bbox
+ (Optional[BoundingBox]
)
+ –
+ col_span
+ (int
)
+ –
+ column_header
+ (bool
)
+ –
+ end_col_offset_idx
+ (int
)
+ –
+ end_row_offset_idx
+ (int
)
+ –
+ row_header
+ (bool
)
+ –
+ row_section
+ (bool
)
+ –
+ row_span
+ (int
)
+ –
+ start_col_offset_idx
+ (int
)
+ –
+ start_row_offset_idx
+ (int
)
+ –
+ text
+ (str
)
+ –
+
col_span
+
+
+col_span: int = 1
+
column_header
+
+
+column_header: bool = False
+
end_col_offset_idx
+
+
+end_col_offset_idx: int
+
end_row_offset_idx
+
+
+end_row_offset_idx: int
+
row_header
+
+
+row_header: bool = False
+
row_section
+
+
+row_section: bool = False
+
row_span
+
+
+row_span: int = 1
+
start_col_offset_idx
+
+
+start_col_offset_idx: int
+
start_row_offset_idx
+
+
+start_row_offset_idx: int
+
text
+
+
+text: str
+
from_dict_format
+
+
+from_dict_format(data: Any) -> Any
+
from_dict_format.
+ +
TableData
+
+
+
+ Bases: BaseModel
BaseTableData.
+ + + + + + + + + + + +Attributes:
+ + + + + + + +
TableCellLabel
+
+
+
+ Bases: str
, Enum
TableCellLabel.
+ + + + + + + + + + + +Attributes:
+BODY
+ –
+ COLUMN_HEADER
+ –
+ ROW_HEADER
+ –
+ ROW_SECTION
+ –
+
BODY
+
+
+BODY = 'body'
+
COLUMN_HEADER
+
+
+COLUMN_HEADER = 'col_header'
+
ROW_HEADER
+
+
+ROW_HEADER = 'row_header'
+
ROW_SECTION
+
+
+ROW_SECTION = 'row_section'
+
KeyValueItem
+
+
+
+ Bases: DocItem
KeyValueItem.
+ + + + + + + + + +Methods:
+get_image
+ –
+ Returns the image of this DocItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ label
+ (DocItemLabel
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image of this DocItem.
+The function returns None if this DocItem has no valid provenance or +if a valid image of the page containing this DocItem is not available +in doc.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
SectionHeaderItem
+
+
+
+ Bases: TextItem
SectionItem.
+ + + + + + + + + +Methods:
+export_to_document_tokens
+ –
+ Export text element to document tokens format.
+get_image
+ –
+ Returns the image of this DocItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+children
+ (List[RefItem]
)
+ –
+ label
+ (Literal[SECTION_HEADER]
)
+ –
+ level
+ (LevelNumber
)
+ –
+ model_config
+ –
+ orig
+ (str
)
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ self_ref
+ (str
)
+ –
+ text
+ (str
)
+ –
+
level
+
+
+level: LevelNumber
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
orig
+
+
+orig: str
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
text
+
+
+text: str
+
export_to_document_tokens
+
+
+export_to_document_tokens(
+ doc: DoclingDocument,
+ new_line: str = "\n",
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_content: bool = True,
+ add_page_index: bool = True,
+)
+
Export text element to document tokens format.
+:param doc: "DoclingDocument": +:param new_line: str: (Default value = "\n") +:param xsize: int: (Default value = 100) +:param ysize: int: (Default value = 100) +:param add_location: bool: (Default value = True) +:param add_content: bool: (Default value = True) +:param add_page_index: bool: (Default value = True)
+ +
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image of this DocItem.
+The function returns None if this DocItem has no valid provenance or +if a valid image of the page containing this DocItem is not available +in doc.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
PictureItem
+
+
+
+ Bases: FloatingItem
PictureItem.
+ + + + + + + + + +Methods:
+caption_text
+ –
+ Computes the caption as a single text.
+export_to_document_tokens
+ –
+ Export picture to document tokens format.
+export_to_html
+ –
+ Export picture to HTML format.
+export_to_markdown
+ –
+ Export picture to Markdown format.
+get_image
+ –
+ Returns the image corresponding to this FloatingItem.
+get_location_tokens
+ –
+ Get the location string for the BaseCell.
+get_ref
+ –
+ get_ref.
+Attributes:
+annotations
+ (List[PictureDataType]
)
+ –
+ captions
+ (List[RefItem]
)
+ –
+ children
+ (List[RefItem]
)
+ –
+ footnotes
+ (List[RefItem]
)
+ –
+ image
+ (Optional[ImageRef]
)
+ –
+ label
+ (Literal[PICTURE]
)
+ –
+ model_config
+ –
+ parent
+ (Optional[RefItem]
)
+ –
+ prov
+ (List[ProvenanceItem]
)
+ –
+ references
+ (List[RefItem]
)
+ –
+ self_ref
+ (str
)
+ –
+
annotations
+
+
+annotations: List[PictureDataType] = []
+
model_config
+
+
+model_config = ConfigDict(extra='forbid')
+
self_ref
+
+
+self_ref: str = Field(pattern=_JSON_POINTER_REGEX)
+
caption_text
+
+
+caption_text(doc: DoclingDocument) -> str
+
Computes the caption as a single text.
+ +
export_to_document_tokens
+
+
+export_to_document_tokens(
+ doc: DoclingDocument,
+ new_line: str = "\n",
+ xsize: int = 100,
+ ysize: int = 100,
+ add_location: bool = True,
+ add_caption: bool = True,
+ add_content: bool = True,
+ add_page_index: bool = True,
+)
+
Export picture to document tokens format.
+:param doc: "DoclingDocument": +:param new_line: str: (Default value = "\n") +:param xsize: int: (Default value = 100) +:param ysize: int: (Default value = 100) +:param add_location: bool: (Default value = True) +:param add_caption: bool: (Default value = True) +:param add_content: bool: (Default value = True) +:param # not used at the momentadd_page_index: bool: (Default value = True)
+ +
export_to_html
+
+
+export_to_html(
+ doc: DoclingDocument,
+ add_caption: bool = True,
+ image_mode: ImageRefMode = PLACEHOLDER,
+) -> str
+
Export picture to HTML format.
+ +
export_to_markdown
+
+
+export_to_markdown(
+ doc: DoclingDocument,
+ add_caption: bool = True,
+ image_mode: ImageRefMode = EMBEDDED,
+ image_placeholder: str = "<!-- image -->",
+) -> str
+
Export picture to Markdown format.
+ +
get_image
+
+
+get_image(doc: DoclingDocument) -> Optional[Image]
+
Returns the image corresponding to this FloatingItem.
+This function returns the PIL image from self.image if one is available. +Otherwise, it uses DocItem.get_image to get an image of this FloatingItem.
+In particular, when self.image is None, the function returns None if this +FloatingItem has no valid provenance or the doc does not contain a valid image +for the required page.
+ +
get_location_tokens
+
+
+get_location_tokens(
+ doc: DoclingDocument,
+ new_line: str,
+ xsize: int = 100,
+ ysize: int = 100,
+ add_page_index: bool = True,
+) -> str
+
Get the location string for the BaseCell.
+ +
get_ref
+
+
+get_ref()
+
get_ref.
+ +
ImageRef
+
+
+
+ Bases: BaseModel
ImageRef.
+ + + + + + + + + +Methods:
+from_pil
+ –
+ Construct ImageRef from a PIL Image.
+validate_mimetype
+ –
+ validate_mimetype.
+Attributes:
+dpi
+ (int
)
+ –
+ mimetype
+ (str
)
+ –
+ pil_image
+ (Optional[Image]
)
+ –
+ Return the PIL Image.
+size
+ (Size
)
+ –
+ uri
+ (Union[AnyUrl, Path]
)
+ –
+
dpi
+
+
+dpi: int
+
mimetype
+
+
+mimetype: str
+
pil_image
+
+
+pil_image: Optional[Image]
+
Return the PIL Image.
+
uri
+
+
+uri: Union[AnyUrl, Path]
+
from_pil
+
+
+from_pil(image: Image, dpi: int) -> Self
+
Construct ImageRef from a PIL Image.
+ +
validate_mimetype
+
+
+validate_mimetype(v)
+
validate_mimetype.
+ +
PictureClassificationClass
+
+
+
+ Bases: BaseModel
PictureClassificationData.
+ + + + + + + + + + + +Attributes:
+class_name
+ (str
)
+ –
+ confidence
+ (float
)
+ –
+
class_name
+
+
+class_name: str
+
confidence
+
+
+confidence: float
+
PictureClassificationData
+
+
+
+ Bases: BasePictureData
PictureClassificationData.
+ + + + + + + + + + + +Attributes:
+kind
+ (Literal['classification']
)
+ –
+ predicted_classes
+ (List[PictureClassificationClass]
)
+ –
+ provenance
+ (str
)
+ –
+
kind
+
+
+kind: Literal['classification'] = 'classification'
+
provenance
+
+
+provenance: str
+
RefItem
+
+
+
+ Bases: BaseModel
RefItem.
+ + + + + + + + + +Methods:
+ + + + + +Attributes:
+cref
+ (str
)
+ –
+ model_config
+ –
+
cref
+
+
+cref: str = Field(alias="$ref", pattern=_JSON_POINTER_REGEX)
+
model_config
+
+
+model_config = ConfigDict(populate_by_name=True)
+
get_ref
+
+
+get_ref()
+
get_ref.
+ +
BoundingBox
+
+
+
+ Bases: BaseModel
BoundingBox.
+ + + + + + + + + +Methods:
+area
+ –
+ area.
+as_tuple
+ –
+ as_tuple.
+from_tuple
+ –
+ from_tuple.
+intersection_area_with
+ –
+ intersection_area_with.
+normalized
+ –
+ normalized.
+scaled
+ –
+ scaled.
+to_bottom_left_origin
+ –
+ to_bottom_left_origin.
+to_top_left_origin
+ –
+ to_top_left_origin.
+Attributes:
+b
+ (float
)
+ –
+ coord_origin
+ (CoordOrigin
)
+ –
+ height
+ –
+ height.
+l
+ (float
)
+ –
+ r
+ (float
)
+ –
+ t
+ (float
)
+ –
+ width
+ –
+ width.
+
b
+
+
+b: float
+
height
+
+
+height
+
height.
+
l
+
+
+l: float
+
r
+
+
+r: float
+
t
+
+
+t: float
+
width
+
+
+width
+
width.
+
area
+
+
+area() -> float
+
area.
+ +
as_tuple
+
+
+as_tuple()
+
as_tuple.
+ +
from_tuple
+
+
+from_tuple(coord: Tuple[float, ...], origin: CoordOrigin)
+
from_tuple.
+:param coord: Tuple[float: +:param ...]: +:param origin: CoordOrigin:
+ +
intersection_area_with
+
+
+intersection_area_with(other: BoundingBox) -> float
+
intersection_area_with.
+:param other: "BoundingBox":
+ +
normalized
+
+
+normalized(page_size: Size) -> BoundingBox
+
normalized.
+:param page_size: Size:
+ +
scaled
+
+
+scaled(scale: float) -> BoundingBox
+
scaled.
+:param scale: float:
+ +
to_bottom_left_origin
+
+
+to_bottom_left_origin(page_height) -> BoundingBox
+
to_bottom_left_origin.
+:param page_height:
+ +
to_top_left_origin
+
+
+to_top_left_origin(page_height)
+
to_top_left_origin.
+:param page_height:
+ +
CoordOrigin
+
+
+
+ Bases: str
, Enum
CoordOrigin.
+ + + + + + + + + + + +Attributes:
+BOTTOMLEFT
+ –
+ TOPLEFT
+ –
+
BOTTOMLEFT
+
+
+BOTTOMLEFT = 'BOTTOMLEFT'
+
TOPLEFT
+
+
+TOPLEFT = 'TOPLEFT'
+
ImageRefMode
+
+
+
+ Bases: str
, Enum
ImageRefMode.
+ + + + + + + + + + + +Attributes:
+EMBEDDED
+ –
+ PLACEHOLDER
+ –
+ REFERENCED
+ –
+
EMBEDDED
+
+
+EMBEDDED = 'embedded'
+
PLACEHOLDER
+
+
+PLACEHOLDER = 'placeholder'
+
REFERENCED
+
+
+REFERENCED = 'referenced'
+
Size
+
+
+
+ Bases: BaseModel
Size.
+ + + + + + + + + +Methods:
+as_tuple
+ –
+ as_tuple.
+Attributes:
+ + + + + +
height
+
+
+height: float = 0.0
+
width
+
+
+width: float = 0.0
+
as_tuple
+
+
+as_tuple()
+
as_tuple.
+ +This is an automatic generated API reference of the main components of Docling.
+ + +
document_converter
+
+
+Classes:
+DocumentConverter
+ –
+ ConversionResult
+ –
+ ConversionStatus
+ –
+ FormatOption
+ –
+ InputFormat
+ –
+ PdfFormatOption
+ –
+ ImageFormatOption
+ –
+ StandardPdfPipeline
+ –
+ WordFormatOption
+ –
+ PowerpointFormatOption
+ –
+ MarkdownFormatOption
+ –
+ AsciiDocFormatOption
+ –
+ HTMLFormatOption
+ –
+ SimplePipeline
+ –
+ SimpleModelPipeline.
+
DocumentConverter
+
+
+DocumentConverter(
+ allowed_formats: Optional[List[InputFormat]] = None,
+ format_options: Optional[
+ Dict[InputFormat, FormatOption]
+ ] = None,
+)
+
Methods:
+convert
+ –
+ convert_all
+ –
+ initialize_pipeline
+ –
+ Initialize the conversion pipeline for the selected format.
+Attributes:
+allowed_formats
+ –
+ format_to_options
+ –
+ initialized_pipelines
+ (Dict[Type[BasePipeline], BasePipeline]
)
+ –
+
allowed_formats
+
+
+
+ instance-attribute
+
+
+allowed_formats = (
+ allowed_formats
+ if allowed_formats is not None
+ else [e for e in InputFormat]
+)
+
format_to_options
+
+
+
+ instance-attribute
+
+
+format_to_options = {format: _get_default_option(format=format) if (custom_option := get(format)) is None else _ciA1NOTAsTlRfor format in allowed_formats}
+
initialized_pipelines
+
+
+
+ instance-attribute
+
+
+initialized_pipelines: Dict[
+ Type[BasePipeline], BasePipeline
+] = {}
+
convert
+
+
+convert(
+ source: Union[Path, str, DocumentStream],
+ raises_on_error: bool = True,
+ max_num_pages: int = maxsize,
+ max_file_size: int = maxsize,
+) -> ConversionResult
+
convert_all
+
+
+convert_all(
+ source: Iterable[Union[Path, str, DocumentStream]],
+ raises_on_error: bool = True,
+ max_num_pages: int = maxsize,
+ max_file_size: int = maxsize,
+) -> Iterator[ConversionResult]
+
initialize_pipeline
+
+
+initialize_pipeline(format: InputFormat)
+
Initialize the conversion pipeline for the selected format.
+ +
ConversionResult
+
+
+
+ Bases: BaseModel
Attributes:
+assembled
+ (AssembledUnit
)
+ –
+ document
+ (DoclingDocument
)
+ –
+ errors
+ (List[ErrorItem]
)
+ –
+ input
+ (InputDocument
)
+ –
+ legacy_document
+ –
+ pages
+ (List[Page]
)
+ –
+ status
+ (ConversionStatus
)
+ –
+ timings
+ (Dict[str, ProfilingItem]
)
+ –
+
assembled
+
+
+
+ class-attribute
+ instance-attribute
+
+
+assembled: AssembledUnit = AssembledUnit()
+
document
+
+
+
+ class-attribute
+ instance-attribute
+
+
+document: DoclingDocument = _EMPTY_DOCLING_DOC
+
errors
+
+
+
+ class-attribute
+ instance-attribute
+
+
+errors: List[ErrorItem] = []
+
input
+
+
+
+ instance-attribute
+
+
+input: InputDocument
+
legacy_document
+
+
+
+ property
+
+
+legacy_document
+
pages
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pages: List[Page] = []
+
status
+
+
+
+ class-attribute
+ instance-attribute
+
+
+status: ConversionStatus = PENDING
+
timings
+
+
+
+ class-attribute
+ instance-attribute
+
+
+timings: Dict[str, ProfilingItem] = {}
+
ConversionStatus
+
+
+
+ Bases: str
, Enum
Attributes:
+FAILURE
+ –
+ PARTIAL_SUCCESS
+ –
+ PENDING
+ –
+ SKIPPED
+ –
+ STARTED
+ –
+ SUCCESS
+ –
+
FAILURE
+
+
+
+ class-attribute
+ instance-attribute
+
+
+FAILURE = auto()
+
PARTIAL_SUCCESS
+
+
+
+ class-attribute
+ instance-attribute
+
+
+PARTIAL_SUCCESS = auto()
+
PENDING
+
+
+
+ class-attribute
+ instance-attribute
+
+
+PENDING = auto()
+
SKIPPED
+
+
+
+ class-attribute
+ instance-attribute
+
+
+SKIPPED = auto()
+
STARTED
+
+
+
+ class-attribute
+ instance-attribute
+
+
+STARTED = auto()
+
SUCCESS
+
+
+
+ class-attribute
+ instance-attribute
+
+
+SUCCESS = auto()
+
FormatOption
+
+
+
+ Bases: BaseModel
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type[BasePipeline]
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend]
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ instance-attribute
+
+
+pipeline_cls: Type[BasePipeline]
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
InputFormat
+
+
+
+ Bases: str
, Enum
Attributes:
+ASCIIDOC
+ –
+ DOCX
+ –
+ HTML
+ –
+ IMAGE
+ –
+ MD
+ –
+ PDF
+ –
+ PPTX
+ –
+ XLSX
+ –
+
ASCIIDOC
+
+
+
+ class-attribute
+ instance-attribute
+
+
+ASCIIDOC = 'asciidoc'
+
DOCX
+
+
+
+ class-attribute
+ instance-attribute
+
+
+DOCX = 'docx'
+
HTML
+
+
+
+ class-attribute
+ instance-attribute
+
+
+HTML = 'html'
+
IMAGE
+
+
+
+ class-attribute
+ instance-attribute
+
+
+IMAGE = 'image'
+
MD
+
+
+
+ class-attribute
+ instance-attribute
+
+
+MD = 'md'
+
PDF
+
+
+
+ class-attribute
+ instance-attribute
+
+
+PDF = 'pdf'
+
PPTX
+
+
+
+ class-attribute
+ instance-attribute
+
+
+PPTX = 'pptx'
+
XLSX
+
+
+
+ class-attribute
+ instance-attribute
+
+
+XLSX = 'xlsx'
+
PdfFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = (
+ DoclingParseDocumentBackend
+)
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = StandardPdfPipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
ImageFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = (
+ DoclingParseDocumentBackend
+)
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = StandardPdfPipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
StandardPdfPipeline
+
+
+StandardPdfPipeline(pipeline_options: PdfPipelineOptions)
+
+ Bases: PaginatedPipeline
Methods:
+download_models_hf
+ –
+ execute
+ –
+ get_default_options
+ –
+ get_ocr_model
+ –
+ initialize_page
+ –
+ is_backend_supported
+ –
+ Attributes:
+artifacts_path
+ –
+ build_pipe
+ –
+ enrichment_pipe
+ –
+ glm_model
+ –
+ pipeline_options
+ (PdfPipelineOptions
)
+ –
+
artifacts_path
+
+
+
+ instance-attribute
+
+
+artifacts_path = download_models_hf()
+
build_pipe
+
+
+
+ instance-attribute
+
+
+build_pipe = [
+ PagePreprocessingModel(
+ options=PagePreprocessingOptions(
+ images_scale=images_scale
+ )
+ ),
+ ocr_model,
+ LayoutModel(
+ artifacts_path=artifacts_path / _layout_model_path
+ ),
+ TableStructureModel(
+ enabled=do_table_structure,
+ artifacts_path=artifacts_path / _table_model_path,
+ options=table_structure_options,
+ ),
+ PageAssembleModel(
+ options=PageAssembleOptions(keep_images=keep_images)
+ ),
+]
+
enrichment_pipe
+
+
+
+ instance-attribute
+
+
+enrichment_pipe = []
+
glm_model
+
+
+
+ instance-attribute
+
+
+glm_model = GlmModel(options=GlmOptions())
+
pipeline_options
+
+
+
+ instance-attribute
+
+
+pipeline_options: PdfPipelineOptions
+
download_models_hf
+
+
+
+ staticmethod
+
+
+download_models_hf(
+ local_dir: Optional[Path] = None, force: bool = False
+) -> Path
+
execute
+
+
+execute(
+ in_doc: InputDocument, raises_on_error: bool
+) -> ConversionResult
+
get_default_options
+
+
+
+ classmethod
+
+
+get_default_options() -> PdfPipelineOptions
+
get_ocr_model
+
+
+get_ocr_model() -> Optional[BaseOcrModel]
+
initialize_page
+
+
+initialize_page(
+ conv_res: ConversionResult, page: Page
+) -> Page
+
is_backend_supported
+
+
+
+ classmethod
+
+
+is_backend_supported(backend: AbstractDocumentBackend)
+
WordFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = (
+ MsWordDocumentBackend
+)
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = SimplePipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
PowerpointFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = (
+ MsPowerpointDocumentBackend
+)
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = SimplePipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
MarkdownFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = (
+ MarkdownDocumentBackend
+)
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = SimplePipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
AsciiDocFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = AsciiDocBackend
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = SimplePipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
HTMLFormatOption
+
+
+
+ Bases: FormatOption
Methods:
+set_optional_field_default
+ –
+ Attributes:
+backend
+ (Type[AbstractDocumentBackend]
)
+ –
+ model_config
+ –
+ pipeline_cls
+ (Type
)
+ –
+ pipeline_options
+ (Optional[PipelineOptions]
)
+ –
+
backend
+
+
+
+ class-attribute
+ instance-attribute
+
+
+backend: Type[AbstractDocumentBackend] = HTMLDocumentBackend
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(arbitrary_types_allowed=True)
+
pipeline_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_cls: Type = SimplePipeline
+
pipeline_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+pipeline_options: Optional[PipelineOptions] = None
+
set_optional_field_default
+
+
+set_optional_field_default() -> FormatOption
+
SimplePipeline
+
+
+SimplePipeline(pipeline_options: PipelineOptions)
+
+ Bases: BasePipeline
SimpleModelPipeline.
+This class is used at the moment for formats / backends +which produce straight DoclingDocument output.
+ + + + + + + + + +Methods:
+execute
+ –
+ get_default_options
+ –
+ is_backend_supported
+ –
+ Attributes:
+build_pipe
+ (List[Callable]
)
+ –
+ enrichment_pipe
+ (List[BaseEnrichmentModel]
)
+ –
+ pipeline_options
+ –
+
build_pipe
+
+
+
+ instance-attribute
+
+
+build_pipe: List[Callable] = []
+
enrichment_pipe
+
+
+
+ instance-attribute
+
+
+enrichment_pipe: List[BaseEnrichmentModel] = []
+
pipeline_options
+
+
+
+ instance-attribute
+
+
+pipeline_options = pipeline_options
+
execute
+
+
+execute(
+ in_doc: InputDocument, raises_on_error: bool
+) -> ConversionResult
+
get_default_options
+
+
+
+ classmethod
+
+
+get_default_options() -> PipelineOptions
+
is_backend_supported
+
+
+
+ classmethod
+
+
+is_backend_supported(backend: AbstractDocumentBackend)
+
Pipeline options allow to customize the execution of the models during the conversion pipeline.
+This includes options for the OCR engines, the table model as well as enrichment options which
+can be enabled with do_xyz = True
.
This is an automatic generated API reference of the all the pipeline options available in Docling.
+ + +
pipeline_options
+
+
+Classes:
+EasyOcrOptions
+ –
+ Options for the EasyOCR engine.
+OcrMacOptions
+ –
+ Options for the Mac OCR engine.
+OcrOptions
+ –
+ OCR options.
+PdfPipelineOptions
+ –
+ Options for the PDF pipeline.
+PipelineOptions
+ –
+ Base pipeline options.
+RapidOcrOptions
+ –
+ Options for the RapidOCR engine.
+TableFormerMode
+ –
+ Modes for the TableFormer model.
+TableStructureOptions
+ –
+ Options for the table structure.
+TesseractCliOcrOptions
+ –
+ Options for the TesseractCli engine.
+TesseractOcrOptions
+ –
+ Options for the Tesseract engine.
+
EasyOcrOptions
+
+
+
+ Bases: OcrOptions
Options for the EasyOCR engine.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ download_enabled
+ (bool
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ kind
+ (Literal['easyocr']
)
+ –
+ lang
+ (List[str]
)
+ –
+ model_config
+ –
+ model_storage_directory
+ (Optional[str]
)
+ –
+ use_gpu
+ (bool
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
download_enabled
+
+
+
+ class-attribute
+ instance-attribute
+
+
+download_enabled: bool = True
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
kind
+
+
+
+ class-attribute
+ instance-attribute
+
+
+kind: Literal['easyocr'] = 'easyocr'
+
lang
+
+
+
+ class-attribute
+ instance-attribute
+
+
+lang: List[str] = ['fr', 'de', 'es', 'en']
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(
+ extra="forbid", protected_namespaces=()
+)
+
model_storage_directory
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_storage_directory: Optional[str] = None
+
use_gpu
+
+
+
+ class-attribute
+ instance-attribute
+
+
+use_gpu: bool = True
+
OcrMacOptions
+
+
+
+ Bases: OcrOptions
Options for the Mac OCR engine.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ framework
+ (str
)
+ –
+ kind
+ (Literal['ocrmac']
)
+ –
+ lang
+ (List[str]
)
+ –
+ model_config
+ –
+ recognition
+ (str
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
framework
+
+
+
+ class-attribute
+ instance-attribute
+
+
+framework: str = 'vision'
+
kind
+
+
+
+ class-attribute
+ instance-attribute
+
+
+kind: Literal['ocrmac'] = 'ocrmac'
+
lang
+
+
+
+ class-attribute
+ instance-attribute
+
+
+lang: List[str] = ['fr-FR', 'de-DE', 'es-ES', 'en-US']
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(extra='forbid')
+
recognition
+
+
+
+ class-attribute
+ instance-attribute
+
+
+recognition: str = 'accurate'
+
OcrOptions
+
+
+
+ Bases: BaseModel
OCR options.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ kind
+ (str
)
+ –
+ lang
+ (List[str]
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
kind
+
+
+
+ instance-attribute
+
+
+kind: str
+
lang
+
+
+
+ instance-attribute
+
+
+lang: List[str]
+
PdfPipelineOptions
+
+
+
+ Bases: PipelineOptions
Options for the PDF pipeline.
+ + + + + + + + + + + +Attributes:
+artifacts_path
+ (Optional[Union[Path, str]]
)
+ –
+ create_legacy_output
+ (bool
)
+ –
+ do_ocr
+ (bool
)
+ –
+ do_table_structure
+ (bool
)
+ –
+ generate_page_images
+ (bool
)
+ –
+ generate_picture_images
+ (bool
)
+ –
+ generate_table_images
+ (bool
)
+ –
+ images_scale
+ (float
)
+ –
+ ocr_options
+ (Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions]
)
+ –
+ table_structure_options
+ (TableStructureOptions
)
+ –
+
artifacts_path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+artifacts_path: Optional[Union[Path, str]] = None
+
create_legacy_output
+
+
+
+ class-attribute
+ instance-attribute
+
+
+create_legacy_output: bool = True
+
do_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+do_ocr: bool = True
+
do_table_structure
+
+
+
+ class-attribute
+ instance-attribute
+
+
+do_table_structure: bool = True
+
generate_page_images
+
+
+
+ class-attribute
+ instance-attribute
+
+
+generate_page_images: bool = False
+
generate_picture_images
+
+
+
+ class-attribute
+ instance-attribute
+
+
+generate_picture_images: bool = False
+
generate_table_images
+
+
+
+ class-attribute
+ instance-attribute
+
+
+generate_table_images: bool = Field(
+ default=False,
+ deprecated="Field `generate_table_images` is deprecated. To obtain table images, set `PdfPipelineOptions.generate_page_images = True` before conversion and then use the `TableItem.get_image` function.",
+)
+
images_scale
+
+
+
+ class-attribute
+ instance-attribute
+
+
+images_scale: float = 1.0
+
ocr_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+ocr_options: Union[
+ EasyOcrOptions,
+ TesseractCliOcrOptions,
+ TesseractOcrOptions,
+ OcrMacOptions,
+] = Field(EasyOcrOptions(), discriminator="kind")
+
table_structure_options
+
+
+
+ class-attribute
+ instance-attribute
+
+
+table_structure_options: TableStructureOptions = (
+ TableStructureOptions()
+)
+
PipelineOptions
+
+
+
+ Bases: BaseModel
Base pipeline options.
+ + + + + + + + + + + +Attributes:
+create_legacy_output
+ (bool
)
+ –
+
create_legacy_output
+
+
+
+ class-attribute
+ instance-attribute
+
+
+create_legacy_output: bool = True
+
RapidOcrOptions
+
+
+
+ Bases: OcrOptions
Options for the RapidOCR engine.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ cls_model_path
+ (Optional[str]
)
+ –
+ det_model_path
+ (Optional[str]
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ kind
+ (Literal['rapidocr']
)
+ –
+ lang
+ (List[str]
)
+ –
+ model_config
+ –
+ print_verbose
+ (bool
)
+ –
+ rec_model_path
+ (Optional[str]
)
+ –
+ text_score
+ (float
)
+ –
+ use_cls
+ (Optional[bool]
)
+ –
+ use_det
+ (Optional[bool]
)
+ –
+ use_rec
+ (Optional[bool]
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
cls_model_path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+cls_model_path: Optional[str] = None
+
det_model_path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+det_model_path: Optional[str] = None
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
kind
+
+
+
+ class-attribute
+ instance-attribute
+
+
+kind: Literal['rapidocr'] = 'rapidocr'
+
lang
+
+
+
+ class-attribute
+ instance-attribute
+
+
+lang: List[str] = ['english', 'chinese']
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(extra='forbid')
+
print_verbose
+
+
+
+ class-attribute
+ instance-attribute
+
+
+print_verbose: bool = False
+
rec_model_path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+rec_model_path: Optional[str] = None
+
text_score
+
+
+
+ class-attribute
+ instance-attribute
+
+
+text_score: float = 0.5
+
use_cls
+
+
+
+ class-attribute
+ instance-attribute
+
+
+use_cls: Optional[bool] = None
+
use_det
+
+
+
+ class-attribute
+ instance-attribute
+
+
+use_det: Optional[bool] = None
+
use_rec
+
+
+
+ class-attribute
+ instance-attribute
+
+
+use_rec: Optional[bool] = None
+
TableFormerMode
+
+
+
+ Bases: str
, Enum
Modes for the TableFormer model.
+ + + + + + + + + + + +Attributes:
+ + + + + +
ACCURATE
+
+
+
+ class-attribute
+ instance-attribute
+
+
+ACCURATE = 'accurate'
+
FAST
+
+
+
+ class-attribute
+ instance-attribute
+
+
+FAST = 'fast'
+
TableStructureOptions
+
+
+
+ Bases: BaseModel
Options for the table structure.
+ + + + + + + + + + + +Attributes:
+do_cell_matching
+ (bool
)
+ –
+ mode
+ (TableFormerMode
)
+ –
+
do_cell_matching
+
+
+
+ class-attribute
+ instance-attribute
+
+
+do_cell_matching: bool = True
+
mode
+
+
+
+ class-attribute
+ instance-attribute
+
+
+mode: TableFormerMode = FAST
+
TesseractCliOcrOptions
+
+
+
+ Bases: OcrOptions
Options for the TesseractCli engine.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ kind
+ (Literal['tesseract']
)
+ –
+ lang
+ (List[str]
)
+ –
+ model_config
+ –
+ path
+ (Optional[str]
)
+ –
+ tesseract_cmd
+ (str
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
kind
+
+
+
+ class-attribute
+ instance-attribute
+
+
+kind: Literal['tesseract'] = 'tesseract'
+
lang
+
+
+
+ class-attribute
+ instance-attribute
+
+
+lang: List[str] = ['fra', 'deu', 'spa', 'eng']
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(extra='forbid')
+
path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+path: Optional[str] = None
+
tesseract_cmd
+
+
+
+ class-attribute
+ instance-attribute
+
+
+tesseract_cmd: str = 'tesseract'
+
TesseractOcrOptions
+
+
+
+ Bases: OcrOptions
Options for the Tesseract engine.
+ + + + + + + + + + + +Attributes:
+bitmap_area_threshold
+ (float
)
+ –
+ force_full_page_ocr
+ (bool
)
+ –
+ kind
+ (Literal['tesserocr']
)
+ –
+ lang
+ (List[str]
)
+ –
+ model_config
+ –
+ path
+ (Optional[str]
)
+ –
+
bitmap_area_threshold
+
+
+
+ class-attribute
+ instance-attribute
+
+
+bitmap_area_threshold: float = 0.05
+
force_full_page_ocr
+
+
+
+ class-attribute
+ instance-attribute
+
+
+force_full_page_ocr: bool = False
+
kind
+
+
+
+ class-attribute
+ instance-attribute
+
+
+kind: Literal['tesserocr'] = 'tesserocr'
+
lang
+
+
+
+ class-attribute
+ instance-attribute
+
+
+lang: List[str] = ['fra', 'deu', 'spa', 'eng']
+
model_config
+
+
+
+ class-attribute
+ instance-attribute
+
+
+model_config = ConfigDict(extra='forbid')
+
path
+
+
+
+ class-attribute
+ instance-attribute
+
+
+path: Optional[str] = None
+
Wc9y%aotz$Wo$f;n zjvI*X<99OfeWhcAJMO&8tM9S6oQZQ+Y1?`67AcY`D!yj~GZ|4Uk3ze;%&^~oNgVxY z%FQ0OYmJTLE1p5CvK(udgT2ljv%4aKEXhGm6DmS+dHWG}1}yxava~&ffAw_H!O0VV zz7dfsF2L2VWZ=pjqTUzCx|74`2-wD@-{Ir%xTAP4v6Stnu~PnMx2$GYd2-shFE!<1 z00_$*zWdldKT-+|>2u60EmctZXace)ZW{b1kbqOr+(d9YWp z9CyE0W4~FI3~PXk1!}7Xy7GyUYt6U2tn2w|Zb_RjMHq)9?oZ;AD}WfN>+U8!jDbYF zr$H3M&jvX&6{~Y``Ezh+^<$lpYvO?(xvM`tGPD=b9Qu2A2FY2T*hnSq668DGd$fH@ z@!r5!mjrsb=WmB;V*~hcCCnxr+U;{3+w2^0Ws QiIPPZSs&Eedwpce7H3t%I;944fXtHQqLEoG2xEX8FKN@`^IIP9he7M*} z4Dp+X{BG&?2Vj}x%qibtEb$P=LhZyWr|7YXLx5N34Nk-2N2t+4tuE;6uK*WvG>}&w zeO%2xF(U-o91#>t`;RZDb+;ol&g48qh<4-Sjqz5dm9 i^Z|72G@wox zg@%axG*iR{;&Pm$c!XFibAKLjNb-8GA8@Xhy&-!?8gb|RxszCbKF*+_7BgM;8812o zhYkb?q^K;grd4OkVToj>a t6kvUbWXxCjbceZban1hGfLp-ka;R`R>3uoc?NKDiMB^TE2GbEP z$e)2fefohc?o_mMGCy7JkMHH6doNP}M~PVOxVx)!QR_<$;@?>GfY0+VU^%9viYCch zE>5S>SIjuqNgE&$=NiNRy+9VFAUZ(nH@T~tQnGGiw{+p3O44xdBJG3QctU;Je^AHr zG6`=zG00LwE&X6nQ~$Y4-Q$DAL!yG;qk;c8a{?>!k}4g X3{ZWgb#B3p0@31GCj}gFZd;gW0lb;5VL~QZ@rg-;2Zl zyHXY90&-xx^zy62jLHV3bzHrlwS5-R#ezi^l>#f=E~84j&3 ZL~z!#&OAs z8)r~lkmLZBw-d|TLts@pZIjAtuUPq#aA8P5rvNvM-Pe2s!gMt&j`f=1=k; QqYv{1Jg3l{IM;k-gR0=Bpfn8E}MC_}h= zXh2_(Vbxk`uF{J c)S zC#T!cbDs}qoMp;9dJscLpns9C%G%WITrUoR8sz#3Y++v<<8&AnZ0ZNPB8iY#5bih; zKRGGqr>(YBR!?FXA^ieJoQO7nuzz5c-2CT3)&iC7U~S;4OQr;tmHh{t4Zgvqc-;rn z9DT|poHtdXaYOCNNuq!;+AyEPtJ+UvbI**wH)fBC)P25l*WSUPxiJ5Y>~xnOgrT zSGI5BAE AghvC6>sMC8S?GRbm6i!$Uxc%tmWzO4&e&nu9+s+;u zta4ASHw2q m|$}S%F>c~+@o( zJ_(!p *r0W;+W^Y!*8U5Cu}x z1t!xDYi|2`O>9NRR2P*0-t}@fAR&9VP0hK@zLMh^6yeD53Ma0_6#x)xS8x4bdnhZP zIC*wqsO)Ga_xJjAFtB?0u|&-!Il NzWQuJLkknRtt7i|XfW6X^mA zWGva`Rzbe$df3$)xw#MI8P*G-hKY;dPEy16a3M~^)`b^APE#Raxpg+k(LXkaO=A0< zk=eEEWlPE-cNtCMe%S4Bm#$wAa0z?I@=1)LV&|(g!Nh61YrnytAMuPK4j8M}a6kBX z{zVXwQf`{Two);rKT}FRzB|1XN&Wg9jmo~zW`BZIay}xBN>_5bHD@Nqx&HBMnky#R z6`P&>0@$5-Fo+j4>I!KU!KhgFgs9ZKIIeT6Q8!68nn*9JI0Io(ORVA*Z9l->%DWcT z@3!mi+gT<1<5>=NHWWV)NyO1yGS;{nrNsRAU;Vq@2oR6c3cV?g{Tv8-mP@mMp9{hR zPuzbN;QzDHiH9$YanT-*bWB6NK}Hi%n0?faa_RJ3S#27K9UQ?+GC7U?JaEsQ^*PJ& zQ}y)*Is?4yW$SIep~G>T?Po~Y-|QmG7#j)Bdwu)j=R^1Y#XmVj=b6hp>^bfBZ|5Y4 zaB@3QjSC>PvPJ2eV2e(79u8~>%-oiYDgJ5e;K1s=?bv!^SjDZvfi#^S-yG}dW~Ris zNEU3EKf#Z2oQALTxNFq+sAygECgo5!XMkaL2|KFb43^VZM7+Ye4B3k;2@jhahqiN< zC;f_w)~!TdlX4va8~{f+i%wvBgIh^Xhea`)Y#iv>HgKl`Hb0Xl|DzBvTET1Z%{f*C zXGZTMlQs4 @o$7(r8Wxoor#!026I%Dx zTrl}G_tezXq#|NUfP2IZ9A~du2;7 yc#?yZ zbSNJ>9)y3U49P>cRDpcoUo02PaOF-i=+%)LG4-XbbCLYmVj~T_2752kX-%so 3SI3n8Lq489C-`pDKuOxqWgtL{89&BhJcoaZ zEiukzMr$HlMU!fz%-(`Y;pTbZ1-HtoN%kSWp4nBQ#?*!}+{~@~&QH?%bzcahw Q}!&c7TelBX~9{Mb^QJ5?7FlNBR^x)0ItZg>T zqx%}K4;@OLsSo}gYJY#nT^(=Hm`dQgbfO+ETBa%=V*+gmQa1g5TlZeN8|DihIo8|f zIDrTE=ChgrEl#xIOw_Wk7fJXqt$Q?&nejr7vt#*ABMUv!^yts$0WXa1ABD5=BxY6O zN?Z9 BCZ5uEDD0l8_J?bn&) *66j`MiR&m?)&$3|9x8r 8nI)Ke zVSTs2q)+(7+5VfAdsBqE9)yq#r^Ny5imh>Pj`0M4VW0ED4kFYpW3emU3Uc@KrvfP} zF0-RiRk2=?7L`or_$EJo{&el{i)lY``~>LBkxr}HgW;9Hg!|vMoZqb|^x{{y*Pm`) zJ@*4QNVeMiJ(=^^uWnxq`~i9G!FcxYT~X7rMLWLN@3;(Qr6c*dlVqFrWTJ^0d_Q8o z4r}hLHgQaw1{Gc_d2crsmtCi 2F7JB#mx+u#330d# zPlG_7^*!{hB)M2>$cTgM-uO%dy54cZS}DeQ^GXf|O;G2>lJ$<|E^e0icQ3-Ggk$)F z+;N#nhi`+?55>@#h-p!v19kkU8$yFblVziI6MP|l(RSpNuW8(#L|IM`tgLf!JV9&w zbir)yWx&(zD-3g=eS1O1v%dU}&)T43xBUR~5FF1&IbhUsLK}B0`1?&jUJjBIAyv_F zcKtMYqUV^+(lDPa|8}s$b`NBt3@(saTAb1cQacygX**H8;NstX$Gz}c0H&;V7!EBY zrkTg7O43GyYG5wv?yuax>EWZi79M|V*v^{6N@5?zImlS8;vNhYT{J hsxToTfh&(w3E*h`_)JB=$QhL5_dgz)|nf)!*3nl zue}goS^a5ZTBM>y)kMr}XQD|338rX1%M J8a{WV*rl=SZ@lakU}?Tw(3Y^6~bB zT{7hGnmGDX`QG$XA$&2)d><{4p|_N*e{>WkiThpsg(e!3WBrBfIQ+3-3@NnJxAIu5 zCEdU3<~V9$tinCGB4ANP#ZkWOA{~B{k3kMinXM`(x+y?V{K9nHQW0?ZTr_5n9@){rY2O;9>gvm_yXaFpXp&78Es%Xl!=ggwEaiHS zWUHE KmvJg>!xe7zLj_Gb^kx^P=Xn&t@ZbVS}1;6Y7v z@0>US*C5u@KD?X#6MC`Pixg~g>PkX0x3kR_DT!Af{9t5X83SA(;i)uX9QSamd_W&u zrv5e7?%$3x>C3X*;O?uJ6)=r0=@8g&DQy_XQXbyjDC>FGwZ}Yhldtw7F>ZQ2GUfh4 zn?{8JiTW5cN xNG9egA%f1X`}OWVDHgvtr|nEEGkq6F&Emk1OMlg-E}K1+}|;K3(a_I zE(_b*f4MS26RqrAoKate`BG~+4v|Y!KrQ68)r9Dw$sw9Bt?3i(ZHR6|mVVPbH_K_Y zKPKWl&NnD)C9{^e+kW1%v;@?0JXFKn&v9+K*Rw(S<3{gL9p4`?a_5B-l|sZl7OB9y znzlW}mW$jfO7t4nlO~ju)e4v`zs5K(QPsot|BQmbNxBS)hT4G-`BUsZ*xOi@nkXDH z_k4MV^h3n$RrGF$T&s`Ad0_x5ji(EtXd&24cPTRc1)<6Dy=^%RX@gGXVo%?w->44S zsx@FftAc;JIXpT~SflndO9mOK)_R!g)5MTmrM@!OoZXFW(tF!uV&WSsduwy(MN&jQ zJxn?Hv$FuSkn@pu_LIP`tW0;clrROZDE}(Bi$+?%ix~8UpMu=Ba%F%%qE2Mb!L(>( zvniOE-{@&yneOQMFl{$|%IIxmSi}%08KX+ QIO;&1doO#Vw6;i2nDaC} aEDuNE^EO)|_2*}?gE1|L=M?!W#60Xl=>TKaUSDBRF_)?5t?h3SZFPILS$H!oL( zERngg)H gxx{uql0#Nh?qRl;H2%gT}$TF{XS=i-DBd*c@b$Z zEAu`BbN-PJ$P+nzF=aIcecB=XC1RbAty<6b!;sBzVI?_uE1#qgb0Gc^=;xQwuNN-t z?{4_6!K}4A8VCa)&$M9#>x9=umEt`sDNueo=yAihg3$8!8h2V3|2#F30H1c%i> INU+tJ)f0vji+AeUu`|%OTL^PTvsOqD9qyZ+zJmH)-+F52wWKRTDmK4xMkN zGhkwW+kA#l`q_iivBZhXqodCfe6K;D1=~3 2+eEc<=IuoaQrWhRn;f z(w0MADp;oXKaP20=p!Nk-J-Oee4n!s8d;whw#Q^*<)O&SM)#G^_$uS)Ti-X4gzZL( zTsDc2$bV>EQ`lN>we9VdLz^jN-krt0m;SD-YSPeAi$_rAT?Vhmz|?cNa9)Zy`%& zvf_uj&7Tb4bs5N6 zWZbJ6{U-ie FPN?qbf=Baf`2;pU&Wt-%g`W#Jn i7 zIJH5{z96i3vtdGkg-}GXzX Z z)bqchz8=&)FEj?DsKlV1m7wPWSImG?R52;H^7W5Pxhg!j=hO`Mc`GgVa#qsU+(u7U zOf8L<>!4YMcV-iYkMbF7J8$&xew!zLD|pe((Hyva=T=uOrtiZYhM7WP$X0T6_daUm z5~dteng6YYNM*scFe5{2;g*ABP+NiP?q&xl`nzGZbdc*)ZC?4yE&HR?{H6|rsda$N z>jL7FI$?66HG=yf Z)p8QKQlJi;41clp&4bH|20uHh-06 z>ew?4nRBM `93rFQDwyxt^iUK9QlmBT?4 zigKG;Si)&I>v{uj#jUl0~@90$;s~FGuf~h}(8#>tIx^Te!WAyE 5VWPoO1TIpDYtxBg)t
4!yt8ZJ1$UOLVpcZ?r=2GxReQCw4x0loQlVY5!U1-kfpwOSA|!(n =dv?A~Wn&%fcvxAPS%Cg>%GDBmB+qCY0Oe@B=x76ijLr_W(g)a(@ zsm9i<30=nNwZRQR6D<( }%wW~p3O+GW4MVcfIcYY7=$ z1ovW6NV@A!Ep)ojb#uUQTxDiI{*h+ObM!tz&2dunRl8x! dTdsoz83FDQ7mK(3QPMFcuhI=|^->%1v#H*ZSgw)L1* zyF5kKvGRos&%iAmTkk9ufHZY~njuhj8=9}rQN-|n5(Bb$YI2OvV&PC_spF?UP*&6d z)Y7<-#ZMb9mz9~RgETI)S?Etitg$g!oa#$^Lf!f>ez_`hnr}xZRp@^Uwv)Gf0q)35 zq^bOJ{Ug4UHcf(`_Fj_4NC|Zg^I6N+YGaB_Ci2>ZWr#JXf(0L=KR0*92T|0USm jxO8!VKvyIFg>f(xb+>v%OtymIZs2)^&P^t z`g!JfU{lr`xBZ g$<<_z0$MEFU_FWXTm9F1^TkAH19mZwy6?c zQB(bKZ;zH|Ab+W4v`qw~N%;{zy7KLHmvKI`8wC2~8K`5uvSAaEYL|!v*P5#)s`pWz zAcdx+w8pj$-z~=+B_J`i26UMTF^%GC6=ZA`G >c>Sgfy97*4a06U|`TBY-!^?AVkJ7 z9y<#hZD8Z~&IvA$Z7roTCd>9(?wS={4jCJ(bVho%iNvZ1X^!?`CG7o80a`<8jq*`? zvUwDO>JC?|p@wOWzRhAUTdD1$^2Q+0KN6MMzNv>u*YP#F`h~`@9&`Zz#yEN&ukN_r+G6ytEi1 PShs6 zi|ih8o0Cef2fj<3o?YpL;E|xTRP1RPN~L&$mtC^c_~P3{y{|8m0t2Z}`Z IJO>zh z{JG+-T8Xi1va62+PDXiEk%^58}iKW&M7N&h5$ zj~M{NKM^A@vi${bzJDa`ir0(mDMJjKmrN (>54 ztBU!-0*$@3Pb*3d`c}I6?Wbo_UdE7h9kv~Q_1IsXJ+1{i&S4;1@baG{EbhIG%L9hB zLbc_0>iv#iG%rS%zToUR+Hk3_%C?iK=e2l6Rw*J4D`(yxF=z;^we0fkoAIpz0l(VY zm$g^t2~X1sMmWr87D0)_@S)EhgfW-!L7?W#AP!SB!jtJr<}gw^>Oyigxk?Q|1sq$8 z$ONsjaWv@|P=H9&vj!gzw60RNL~(P;nbic+OO5*txxSXM{PDK{ELtcO@~p}h33>%t zjYXh5fxWXp$9R*fkWA78dp|Zg>_>sg?l+q7J@c|R75=14W %Ta1!v5XnWI##rKwqoz#A#m&W``im0hWGem~QefacKsQvlPwaf@pmLTy^hMI7 zb%stAI(0<-ew=l;eCwZ%f5G~{QRA<7+Cz^k8#Q?s+5M9J L6 zkr0kiXA60Ra8Pnqb`^RD6hH*jw%r{oO*%eVT2X6TK-6ndgP4FRAEmK-YZ1N>cscAW zC2xwsL&QGl;1OX wNNi$#$6)r