Skip to content

Commit

Permalink
feat: added support for exporting DocItem to an image when page image…
Browse files Browse the repository at this point in the history
… is available (#379)

* Updated minimum docling-core version to 2.4.0

Signed-off-by: Shubham Gupta <[email protected]>

* Deprecated the generate_table_images option

Signed-off-by: Shubham Gupta <[email protected]>

* Updated examples to use get_image instead of element.image

Signed-off-by: Shubham Gupta <[email protected]>

---------

Signed-off-by: Shubham Gupta <[email protected]>
  • Loading branch information
sh-gupta authored Nov 19, 2024
1 parent 911c3bd commit 3f91e7d
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 8 deletions.
9 changes: 8 additions & 1 deletion docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,11 @@ class PdfPipelineOptions(PipelineOptions):
images_scale: float = 1.0
generate_page_images: bool = False
generate_picture_images: bool = False
generate_table_images: bool = False
generate_table_images: bool = Field(
default=False,
deprecated=(
"Field `generate_table_images` is deprecated. "
"To obtain table images, set `PdfPipelineOptions.generate_page_images = True` "
"before conversion and then use the `TableItem.get_image` function."
),
)
2 changes: 1 addition & 1 deletion docs/examples/develop_picture_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __call__(
assert isinstance(element, PictureItem)

# uncomment this to interactively visualize the image
# element.image.pil_image.show()
# element.get_image(doc).show()

element.annotations.append(
PictureClassificationData(
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/export_figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def main():
pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_page_images = True
pipeline_options.generate_table_images = True
pipeline_options.generate_picture_images = True

doc_converter = DocumentConverter(
Expand Down Expand Up @@ -61,15 +60,15 @@ def main():
output_dir / f"{doc_filename}-table-{table_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
element.get_image(conv_res.document).save(fp, "PNG")

if isinstance(element, PictureItem):
picture_counter += 1
element_image_filename = (
output_dir / f"{doc_filename}-picture-{picture_counter}.png"
)
with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG")
element.get_image(conv_res.document).save(fp, "PNG")

# Save markdown with embedded pictures
content_md = conv_res.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
Expand Down
21 changes: 19 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ packages = [{include = "docling"}]
######################
python = "^3.10"
pydantic = "^2.0.0"
docling-core = "^2.3.0"
docling-core = "^2.4.0"
docling-ibm-models = "^2.0.3"
deepsearch-glm = "^0.26.1"
filetype = "^1.2.0"
Expand Down

0 comments on commit 3f91e7d

Please sign in to comment.