From 311640fb9d51d657375d240a9a187c41e7762e9c Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Fri, 8 Nov 2024 05:41:09 +0100
Subject: [PATCH] reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling/backend/html_backend.py | 105 ++++++++++++++++++++++++--------
 docling/cli/main.py             |   4 +-
 2 files changed, 83 insertions(+), 26 deletions(-)
diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index b14e45ff..3680e057 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -11,6 +11,8 @@
     DoclingDocument,
     DocumentOrigin,
     GroupLabel,
+    ImageRef,
+    Size,
     TableCell,
     TableData,
 )
@@ -117,16 +119,6 @@ def walk(self, element, doc):
             else:
                 _log.debug(f"ignoring element of type {type(element)}")
 
-            """
-            elif isinstance(element, Tag):
-                try:
-                    self.analyse_element(element, 0, doc)
-                except Exception as exc:
-                    _log.info(f" -> error treating elem: {exc}")
-                    raise exc
-            """
-
-                
         except Exception as exc:
             _log.debug(f"error walking element: {type(element)}")
             pass
@@ -472,17 +464,63 @@ def extract_table_cell_text(self, cell):
             )
             return cell.text
 
-    def handle_figure(self, element, idx, doc):
-        """Handles image tags (img)."""
+    def _get_imageref(self, element):
 
-        # Extract the image URI from the <img> tag
-        # image_uri = root.xpath('//figure//img/@src')[0]
+        fig_ref = None
 
-        contains_captions = element.find(["figcaption"])
-        if contains_captions is None:
-            doc.add_picture(parent=self.parents[self.level], caption=None)
+        img = element.find(["img"])
+        _log.info(img)
 
-        else:
+        if img is not None and img.has_attr("src"):
+            fig_uri = img["src"]
+            _log.info(fig_uri)
+
+            dpi = 128
+            try:
+                dpi = int(img["dpi"])
+            except:
+                _log.debug("could not identify `dpi` of image")
+
+            width = 128
+            try:
+                width = int(img["width"])
+            except:
+                _log.debug("could not identify `width` of image")
+
+            height = 128
+            try:
+                height = int(img["height"])
+            except:
+                _log.debug("could not identify `height` of image")
+
+            if fig_uri.endswith(".jpg"):
+                fig_ref = ImageRef(
+                    mimetype="image/jpg", dpi=dpi, size=Size(width, height), uri=fig_uri
+                )
+
+            elif fig_uri.endswith(".jpeg"):
+                fig_ref = ImageRef(
+                    mimetype="image/jpg", dpi=dpi, size=Size(width, height), uri=fig_uri
+                )
+
+            elif fig_uri.endswith(".png"):
+                fig_ref = ImageRef(
+                    mimetype="image/png", dpi=dpi, size=Size(width, height), uri=fig_uri
+                )
+
+            elif fig_uri.endswith(".svg"):
+                fig_ref = ImageRef(
+                    mimetype="image/svg", dpi=dpi, size=Size(width, height), uri=fig_uri
+                )
+
+        return fig_ref
+
+    def _get_figcaption(self, element, doc):
+
+        fig_caption = None
+
+        contains_captions = element.find(["figcaption"])
+        if contains_captions is not None:
             texts = []
             for item in contains_captions:
                 texts.append(item.text)
@@ -490,15 +528,34 @@ def handle_figure(self, element, idx, doc):
             fig_caption = doc.add_text(
                 label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
             )
-            doc.add_picture(
-                parent=self.parents[self.level],
-                caption=fig_caption,
-            )
+
+        return fig_caption
+
+    def handle_figure(self, element, idx, doc):
+        """Handles image tags (img)."""
+
+        fig_ref = self._get_imageref(element)
+        fig_caption = self._get_figcaption(element, doc)
+
+        _log.warn(fig_ref)
+
+        doc.add_picture(
+            parent=self.parents[self.level], image=fig_ref, caption=fig_caption
+        )
 
     def handle_image(self, element, idx, doc):
         """Handles image tags (img)."""
-        doc.add_picture(parent=self.parents[self.level], caption=None)
+
+        fig_ref = self._get_imageref(element)
+
+        doc.add_picture(parent=self.parents[self.level], image=fig_ref, caption=None)
 
     def handle_svg(self, element, idx, doc):
         """Handles svg tags."""
-        doc.add_picture(parent=self.parents[self.level], caption=None)
+
+        fig_ref = self._get_imageref(element)
+        fig_caption = self._get_figcaption(element, doc)
+
+        doc.add_picture(
+            parent=self.parents[self.level], image=fig_ref, caption=fig_caption
+        )
diff --git a/docling/cli/main.py b/docling/cli/main.py
index 0b6106ed..a1089a8a 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -120,8 +120,8 @@ def export_documents(
             if export_itxt:
                 fname = output_dir / f"{doc_filename}.itxt"
                 with fname.open("w") as fp:
-                    _log.info(f"writing Doc Tags output to {fname}")
-                    fp.write(conv_res.document._export_to_indented_text())                    
+                    _log.info(f"writing Indented Text output to {fname}")
+                    fp.write(conv_res.document._export_to_indented_text())
         else:
             _log.warning(f"Document {conv_res.input.file} failed to convert.")
             failure_count += 1