From adf635348365f82daa64e3f879076a7baf71edc0 Mon Sep 17 00:00:00 2001 From: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Date: Tue, 28 Jan 2025 19:03:54 +0100 Subject: [PATCH] fix: use file extension if filetype fails with PDF (#827) Filetype library may not identify some files as PDF. Leverage the file extension as a simple solution. Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --- docling/datamodel/document.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index a2a93aa3..e37541b7 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -352,6 +352,8 @@ def _mime_from_extension(ext): mime = FormatToMimeType[InputFormat.MD][0] elif ext in FormatToExtensions[InputFormat.JSON_DOCLING]: mime = FormatToMimeType[InputFormat.JSON_DOCLING][0] + elif ext in FormatToExtensions[InputFormat.PDF]: + mime = FormatToMimeType[InputFormat.PDF][0] return mime @staticmethod