Skip to content

Commit

Permalink
fix: align output formats (#49)
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
  • Loading branch information
dolfim-ibm authored Aug 26, 2024
1 parent 053eae4 commit 8cc147b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 8 deletions.
7 changes: 3 additions & 4 deletions docling/document_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def convert(self, input: DocumentConversionInput) -> Iterable[ConvertedDocument]
# Note: Pdfium backend is not thread-safe, thread pool usage was disabled.
yield from map(self.process_document, input_batch)

def convert_single(self, source: Path | AnyHttpUrl | str) -> Document:
def convert_single(self, source: Path | AnyHttpUrl | str) -> ConvertedDocument:
"""Convert a single document.
Args:
Expand Down Expand Up @@ -133,11 +133,10 @@ def convert_single(self, source: Path | AnyHttpUrl | str) -> Document:
converted_doc: ConvertedDocument = next(converted_docs_iter)
if converted_doc.status not in {
ConversionStatus.SUCCESS,
ConversionStatus.SUCCESS_WITH_ERRORS,
ConversionStatus.PARTIAL_SUCCESS,
}:
raise RuntimeError(f"Conversion failed with status: {converted_doc.status}")
doc = converted_doc.to_ds_document()
return doc
return converted_doc

def process_document(self, in_doc: InputDocument) -> ConvertedDocument:
start_doc_time = time.time()
Expand Down
6 changes: 2 additions & 4 deletions examples/minimal.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from docling.document_converter import DocumentConverter

source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL
source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
converter = DocumentConverter()
doc = converter.convert_single(source)
print(
doc.export_to_markdown()
) # output: "## DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis [...]"
print(doc.render_as_markdown()) # output: ## Docling Technical Report [...]"

0 comments on commit 8cc147b

Please sign in to comment.