Skip to content

Commit

Permalink
removed print statements
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo Omenetti <[email protected]>
  • Loading branch information
Matteo-Omenetti committed Jan 23, 2025
1 parent a59c03b commit 849aa75
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 15 deletions.
21 changes: 7 additions & 14 deletions docling/models/code_formula_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
)

def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]:
"""Extracts a programming language from the beginning of a (possibly multi-line) string.
"""Extracts a programming language from the beginning of a string.
This function checks if the input string starts with a pattern of the form
``<_some_language_>``. If it does, it extracts the language string and returns
Expand All @@ -162,12 +162,6 @@ def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]
- The second element is the extracted language if a match is found;
otherwise, `None`.
"""
# Explanation of the regex:
# ^<_([^>]+)> : match "<_something>" at the start, capturing "something" (Group 1)
# \s* : optional whitespace
# (.*) : capture everything after that in Group 2
#
# We also use re.DOTALL so that the (.*) part can include newlines.
pattern = r"^<_([^>]+)_>\s*(.*)"
match = re.match(pattern, input_string, flags=re.DOTALL)
if match:
Expand Down Expand Up @@ -209,18 +203,17 @@ def prepare_element(

element_prov = element.prov[0]

expansion_factor = 0.03 # Adjust the expansion percentage as needed
expansion_factor = 0.03
bbox = element_prov.bbox
width = bbox.r - bbox.l
height = bbox.t - bbox.b

# Create the expanded bounding box
expanded_bbox = BoundingBox(
l=bbox.l - width * expansion_factor, # Expand left
t=bbox.t + height * expansion_factor, # Expand top
r=bbox.r + width * expansion_factor, # Expand right
b=bbox.b - height * expansion_factor, # Expand bottom
coord_origin=bbox.coord_origin, # Preserve coordinate origin
l=bbox.l - width * expansion_factor,
t=bbox.t + height * expansion_factor,
r=bbox.r + width * expansion_factor,
b=bbox.b - height * expansion_factor,
coord_origin=bbox.coord_origin,
)

page_ix = element_prov.page_no - 1
Expand Down
1 change: 0 additions & 1 deletion tests/test_code_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def test_code_and_formula_conversion():
assert len(code_blocks) == 1

gt = 'public static void print() {\n System.out.println("Java Code");\n}'
print(gt)

predicted = code_blocks[0].text.strip()
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
Expand Down

0 comments on commit 849aa75

Please sign in to comment.