Skip to content

Commit

Permalink
Fix tests to have unique document_hashes per test
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git committed Sep 18, 2024
1 parent 1b4c80a commit d3eb6c5
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
run: poetry install --all-extras
- name: Testing
run: |
poetry run pytest -vvv -s tests
poetry run pytest -v tests
- name: Run examples
run: |
for file in examples/*.py; do
Expand Down
11 changes: 7 additions & 4 deletions tests/test_backend_docling_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_doc_path():
def test_text_cell_counts():
pdf_doc = Path("./tests/data/redp5695.pdf")

doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz")
doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz5")

for page_index in range(0, doc_backend.page_count()):
last_cell_count = None
Expand All @@ -36,7 +36,7 @@ def test_text_cell_counts():


def test_get_text_from_rect(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz4")
page_backend: DoclingParsePageBackend = doc_backend.load_page(0)

# Get the title text of the DocLayNet paper
Expand All @@ -46,19 +46,22 @@ def test_get_text_from_rect(test_doc_path):
ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"

assert textpiece.strip() == ref
doc_backend.unload()


def test_crop_page_image(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz3")
page_backend: DoclingParsePageBackend = doc_backend.load_page(0)

# Crop out "Figure 1" from the DocLayNet paper
im = page_backend.get_page_image(
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
)
# im.show()
doc_backend.unload()


def test_num_pages(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz2")
doc_backend.page_count() == 9
doc_backend.unload()

0 comments on commit d3eb6c5

Please sign in to comment.