make gradio optional and update README

Signed-off-by: Michele Dolfi <[email protected]>
DS4SD · Jan 24, 2025 · 774b879 · 774b879
1 parent aca82b8
commit 774b879
Show file tree

Hide file tree

Showing 6 changed files with 701 additions and 513 deletions.
diff --git a/Containerfile b/Containerfile
@@ -36,6 +36,8 @@ ENV LANG=en_US.UTF-8
 ENV LC_ALL=en_US.UTF-8
 ENV PYTHONIOENCODING=utf-8
 
+ENV WITH_UI=True
+
 COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./
 
 RUN pip install --no-cache-dir poetry && \

diff --git a/README.md b/README.md
@@ -39,6 +39,7 @@ The following variables are available:
 `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
 `UVICORN_WORKERS`: Number of workers to use.
 `RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development.
+`WITH_UI`: If `True`, The Gradio UI will be available at `/ui`.
 
 ## Usage
 
@@ -48,20 +49,20 @@ The API provides two endpoints: one for urls, one for files. This is necessary t
 
 On top of the source of file (see below), both endpoints support the same parameters, which are the same as the Docling CLI.
 
-- `from_format` (Optional[Union[List[str], str]]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
-- `to_format` (Optional[Union[List[str], str]]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
-- `do_ocr` (Optional[bool]): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
+- `from_format` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
+- `to_format` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
+- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
 - `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
-- `force_ocr` (Optional[bool]): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
-- `ocr_engine` (Optional[str]): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
-- `ocr_lang` (Optional[Union[List[str], str]]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
-- `pdf_backend` (Optional[str]): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`. Defaults to `dlparse_v2`.
-- `table_mode` (Optional[str]): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
-- `abort_on_error` (Optional[bool]): If enabled, abort on error. Defaults to false.
-- `return_as_file` (Optional[bool]): If enabled, return the output as a file. Defaults to false.
-- `do_table_structure` (Optional[bool]): If enabled, the table structure will be extracted. Defaults to true.
-- `include_images` (Optional[bool]): If enabled, images will be extracted from the document. Defaults to true.
-- `images_scale` (Optional[float]): Scale factor for images. Defaults to 2.0.
+- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
+- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
+- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
+- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`. Defaults to `dlparse_v2`.
+- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
+- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
+- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
+- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
+- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to true.
+- `images_scale` (float): Scale factor for images. Defaults to 2.0.
 
 ### URL endpoint
 
@@ -76,14 +77,14 @@ Payload example:
   "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
   "to_formats": ["md", "json", "html", "text", "doctags"],
   "image_export_mode": "placeholder",
-  "do_ocr": True,
-  "force_ocr": False,
+  "do_ocr": true,
+  "force_ocr": false,
   "ocr_engine": "easyocr",
   "ocr_lang": "en",
   "pdf_backend": "dlparse_v2",
   "table_mode": "fast",
-  "abort_on_error": False,
-  "return_as_file": False,
+  "abort_on_error": false,
+  "return_as_file": false,
   "input_sources": "https://arxiv.org/pdf/2206.01062"
 }
 ```

diff --git a/docling_serve/app.py b/docling_serve/app.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 from typing import Annotated, List
 
-import gradio as gr
 from docling.datamodel.base_models import InputFormat
 from docling.document_converter import DocumentConverter
 from dotenv import load_dotenv
@@ -22,13 +21,18 @@
     converters,
     get_pdf_pipeline_opts,
 )
-from docling_serve.gradio_ui import ui as gradio_ui
 from docling_serve.helper_functions import FormDepends, _str_to_bool
 from docling_serve.response_preparation import process_results
 
 # Load local env vars if present
 load_dotenv()
 
+WITH_UI = _str_to_bool(os.getenv("WITH_UI", "False"))
+if WITH_UI:
+    import gradio as gr
+
+    from docling_serve.gradio_ui import ui as gradio_ui
+
 
 # Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
 class ColoredLogFormatter(logging.Formatter):
@@ -83,7 +87,8 @@ async def lifespan(app: FastAPI):
     yield
 
     converters.clear()
-    gradio_ui.close()
+    if WITH_UI:
+        gradio_ui.close()
 
 
 ##################################
@@ -108,11 +113,12 @@ async def lifespan(app: FastAPI):
 )
 
 # Mount the Gradio app
-tmp_output_dir = Path(tempfile.mkdtemp())
-gradio_ui.gradio_output_dir = tmp_output_dir
-app = gr.mount_gradio_app(
-    app, gradio_ui, path="/ui", allowed_paths=["./logo.png", tmp_output_dir]
-)
+if WITH_UI:
+    tmp_output_dir = Path(tempfile.mkdtemp())
+    gradio_ui.gradio_output_dir = tmp_output_dir
+    app = gr.mount_gradio_app(
+        app, gradio_ui, path="/ui", allowed_paths=["./logo.png", tmp_output_dir]
+    )
 
 
 #############################

diff --git a/docling_serve/docling_conversion.py b/docling_serve/docling_conversion.py
@@ -101,7 +101,6 @@ class ConvertDocumentsParameters(BaseModel):
                 "Optional, defaults to easyocr."
             ),
             examples=[OcrEngine.EASYOCR],
-            # pattern="easyocr|tesseract|rapidocr",
         ),
     ] = OcrEngine.EASYOCR
 
@@ -127,7 +126,6 @@ class ConvertDocumentsParameters(BaseModel):
                 f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
             ),
             examples=[PdfBackend.DLPARSE_V2],
-            # pattern="pypdfium2|dlparse_v1|dlparse_v2",
         ),
     ] = PdfBackend.DLPARSE_V2
 
@@ -342,29 +340,11 @@ def convert_documents(
     conversion_request: ConvertDocumentsRequest,
 ):
 
-    # # Initialize some values if missing
-    # # (None, empty string, empty List, List of empty strings)
-    # if not conversion_request.from_formats or all(
-    #     not item for item in conversion_request.from_formats
-    # ):
-    #     conversion_request.from_formats = [e for e in InputFormat]
-
-    # if not conversion_request.to_formats or all(
-    #     not item for item in conversion_request.to_formats
-    # ):
-    #     conversion_request.to_formats = OutputFormat.MARKDOWN
-
     # Sanitize some parameters as they can be a string or a list
     # TODO: maybe it could be done with a Pydantic field validator
     conversion_request.input_sources = _to_list_of_strings(
         conversion_request.input_sources
     )
-    # conversion_request.from_formats = _to_list_of_strings(
-    #     conversion_request.from_formats
-    # )
-    # conversion_request.to_formats = _to_list_of_strings(conversion_request.to_formats)
-    # if conversion_request.ocr_lang is not None:
-    #     conversion_request.ocr_lang = _to_list_of_strings(conversion_request.ocr_lang)
 
     pdf_format_option, options_hash = get_pdf_pipeline_opts(conversion_request)