diff --git a/docling/document_converter.py b/docling/document_converter.py index 8a71a570..e637f18c 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -88,7 +88,7 @@ def convert(self, input: DocumentConversionInput) -> Iterable[ConvertedDocument] # Note: Pdfium backend is not thread-safe, thread pool usage was disabled. yield from map(self.process_document, input_batch) - def convert_single(self, source: Path | AnyHttpUrl | str) -> Document: + def convert_single(self, source: Path | AnyHttpUrl | str) -> ConvertedDocument: """Convert a single document. Args: @@ -133,11 +133,10 @@ def convert_single(self, source: Path | AnyHttpUrl | str) -> Document: converted_doc: ConvertedDocument = next(converted_docs_iter) if converted_doc.status not in { ConversionStatus.SUCCESS, - ConversionStatus.SUCCESS_WITH_ERRORS, + ConversionStatus.PARTIAL_SUCCESS, }: raise RuntimeError(f"Conversion failed with status: {converted_doc.status}") - doc = converted_doc.to_ds_document() - return doc + return converted_doc def process_document(self, in_doc: InputDocument) -> ConvertedDocument: start_doc_time = time.time() diff --git a/examples/minimal.py b/examples/minimal.py index 0ea45a6e..837db718 100644 --- a/examples/minimal.py +++ b/examples/minimal.py @@ -1,8 +1,6 @@ from docling.document_converter import DocumentConverter -source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL +source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL converter = DocumentConverter() doc = converter.convert_single(source) -print( - doc.export_to_markdown() -) # output: "## DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis [...]" +print(doc.render_as_markdown()) # output: ## Docling Technical Report [...]"