2828import zipfile
2929
3030from . import extra
31-
31+ import importlib.util
3232
3333# Set up g_out_log and g_out_message from environment variables.
3434#
@@ -333,6 +333,37 @@ def __init__(self):
333333
334334_globals = _Globals()
335335
336+ _get_layout: typing.Optional[typing.Callable] = None
337+
338+ # global switch ensuring that the recommendation message is shown at most once
339+ _recommend_layout = True # must be referred to as "global" everywhere
340+
341+
342+ def no_recommend_layout():
343+ """For users who never want to see the layout recommendation."""
344+ global _recommend_layout
345+ _recommend_layout = False
346+
347+
348+ def _warn_layout_once():
349+ """Check if we should recommend installing the layout package."""
350+ msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
351+
352+ global _recommend_layout
353+ if (
354+ 1
355+ and _recommend_layout # still True?
356+ and _get_layout is None # no layout function stored here
357+
358+ # client did not globally disable the recommendation
359+ and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
360+
361+ # layout is not available in this Python
362+ and not importlib.util.find_spec("pymupdf.layout")
363+ ):
364+ print(msg)
365+ _recommend_layout = False # never show the message again
366+
336367
337368# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
338369# as of 2023-01-20 11:51:40
@@ -9952,7 +9983,7 @@ def _get_resource_properties(self):
99529983 return rc
99539984
99549985 def _get_textpage(self, clip=None, flags=0, matrix=None):
9955- if g_use_extra:
9986+ if 1 or g_use_extra:
99569987 ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
99579988 tpage = mupdf.FzStextPage(ll_tpage)
99589989 return tpage
@@ -10781,6 +10812,20 @@ def clip_to_rect(self, rect):
1078110812 pclip = JM_rect_from_py(clip)
1078210813 mupdf.pdf_clip_page(pdfpage, pclip)
1078310814
10815+ def get_layout(self):
10816+ """Try to access layout information."""
10817+
10818+ if self.layout_information is not None:
10819+ # layout information already present
10820+ return
10821+
10822+ if not _get_layout:
10823+ # no layout information available
10824+ return
10825+
10826+ layout_info = _get_layout(self)
10827+ self.layout_information = layout_info
10828+
1078410829 @property
1078510830 def artbox(self):
1078610831 """The ArtBox"""
@@ -11432,7 +11477,7 @@ def get_cdrawings(self, extended=None, callback=None, method=None):
1143211477 assert isinstance(page, mupdf.FzPage), f'{self.this=}'
1143311478 clips = True if extended else False
1143411479 prect = mupdf.fz_bound_page(page)
11435- if g_use_extra:
11480+ if 1 or g_use_extra:
1143611481 rc = extra.get_cdrawings(page, extended, callback, method)
1143711482 else:
1143811483 rc = list()
@@ -12157,7 +12202,7 @@ def get_texttrace(self):
1215712202 self.set_rotation(0)
1215812203 page = self.this
1215912204 rc = []
12160- if g_use_extra:
12205+ if 1 or g_use_extra:
1216112206 dev = extra.JM_new_texttrace_device(rc)
1216212207 else:
1216312208 dev = JM_new_texttrace_device(rc)
@@ -13206,6 +13251,9 @@ def xref(self):
1320613251
1320713252 rect = property(bound, doc="page rectangle")
1320813253
13254+ # any result of layout analysis is stored here
13255+ layout_information = None
13256+
1320913257
1321013258class Pixmap:
1321113259
@@ -16391,7 +16439,7 @@ def _textpage_dict(self, raw=False):
1639116439
1639216440 def extractBLOCKS(self):
1639316441 """Return a list with text block information."""
16394- if g_use_extra:
16442+ if 1 or g_use_extra:
1639516443 return extra.extractBLOCKS(self.this)
1639616444 block_n = -1
1639716445 this_tpage = self.this
@@ -16587,7 +16635,7 @@ def extractTextbox(self, rect):
1658716635
1658816636 def extractWORDS(self, delimiters=None):
1658916637 """Return a list with text word information."""
16590- if g_use_extra:
16638+ if 1 or g_use_extra:
1659116639 return extra.extractWORDS(self.this, delimiters)
1659216640 buflen = 0
1659316641 last_char_rtl = 0
@@ -18969,7 +19017,7 @@ def JM_color_FromSequence(color):
1896919017
1897019018
1897119019def JM_color_count( pm, clip):
18972- if g_use_extra:
19020+ if 1 or g_use_extra:
1897319021 return extra.ll_JM_color_count(pm.m_internal, clip)
1897419022
1897519023 rc = dict()
@@ -20469,7 +20517,7 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
2046920517
2047020518
2047120519def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
20472- if g_use_extra:
20520+ if 1 or g_use_extra:
2047320521 return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
2047420522 char_list = None
2047520523 span_list = []
@@ -20682,7 +20730,7 @@ def JM_make_image_block(block, block_dict):
2068220730
2068320731
2068420732def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
20685- if g_use_extra:
20733+ if 1 or g_use_extra:
2068620734 return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
2068720735 line_list = []
2068820736 block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
@@ -20705,7 +20753,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
2070520753
2070620754
2070720755def JM_make_textpage_dict(tp, page_dict, raw):
20708- if g_use_extra:
20756+ if 1 or g_use_extra:
2070920757 return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
2071020758 text_buffer = mupdf.fz_new_buffer(128)
2071120759 block_list = []
@@ -21356,7 +21404,7 @@ def JM_rotate_page_matrix(page):
2135621404
2135721405
2135821406def JM_search_stext_page(page, needle):
21359- if g_use_extra:
21407+ if 1 or g_use_extra:
2136021408 return extra.JM_search_stext_page(page.m_internal, needle)
2136121409
2136221410 rect = mupdf.FzRect(page.m_internal.mediabox)
0 commit comments