Skip to content

Commit e98e838

Browse files
committed
Fix on cloud execution
1 parent 35e73cd commit e98e838

File tree

3 files changed

+18
-11
lines changed

3 files changed

+18
-11
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ httpx==0.27.0
99
sentry-sdk==2.8.0
1010
setuptools==75.4.0
1111
git+https://github.com/huridocs/queue-processor@fce76dc0c39a8d929128304eb77047985ccd344a
12-
git+https://github.com/huridocs/ml-cloud-connector@6f8dd2aaa890b9cebbab3254dddec65b6bbce95f
12+
git+https://github.com/huridocs/ml-cloud-connector@f6ae4b5244b27131e28e2ea1b501ab1bba68fdc7
1313

src/tests/test_app.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import unittest
22
from pathlib import Path
3+
from time import time
34
from unittest import TestCase
45
from fastapi.testclient import TestClient
56

@@ -15,8 +16,10 @@ def test_info(self):
1516

1617
self.assertEqual(200, response.status_code)
1718

18-
# @unittest.skip("This test requires a running cloud service")
19+
@unittest.skip("This test requires a running cloud service")
1920
def test_cloud(self):
21+
start = time()
22+
2023
test_file_path = Path(APP_PATH, "tests", "test_files", "test.pdf")
2124
with open(test_file_path, "rb") as stream:
2225
file_content = stream.read()
@@ -28,3 +31,7 @@ def test_cloud(self):
2831

2932
response = client.get("/get_xml/default.xml")
3033
self.assertEqual(200, response.status_code)
34+
35+
print("start")
36+
print("time", round(time() - start, 2), "s")
37+
print("time", round(time() - start, 2), "s")

src/use_cases/extract_segments_use_case.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from ml_cloud_connector.domain.RestCall import RestCall
66
from ml_cloud_connector.domain.ServerParameters import ServerParameters
77
from ml_cloud_connector.domain.ServerType import ServerType
8-
from ml_cloud_connector.ports.CloudProviderRepository import CloudProviderRepository
98
from ml_cloud_connector.use_cases.ExecuteOnCloudUseCase import ExecuteOnCloudUseCase
109

1110
from configuration import (
@@ -25,6 +24,11 @@
2524

2625
RETRIES = 3
2726

27+
if not USE_LOCAL_SEGMENTATION:
28+
SERVER_PARAMETERS = ServerParameters(namespace="google_v2", server_type=ServerType.DOCUMENT_LAYOUT_ANALYSIS)
29+
CLOUD_PROVIDER = GoogleV2Repository(server_parameters=SERVER_PARAMETERS, service_logger=service_logger)
30+
EXECUTE_ON_CLOUD = ExecuteOnCloudUseCase(cloud_provider=CLOUD_PROVIDER, service_logger=service_logger)
31+
2832

2933
def get_xml_name(task: Task) -> str:
3034
xml_file_name = f"{task.tenant}__{task.params.filename.lower().replace('.pdf', '.xml')}"
@@ -72,10 +76,6 @@ def extract_segments(task: Task, xml_file_name: str = "") -> ExtractionData:
7276

7377

7478
def extract_segments_cloud(pdf_file: PdfFile, task: Task, xml_file_name: str = "") -> (bool, ExtractionData):
75-
server_parameters = ServerParameters(namespace="google_v2", server_type=ServerType.DOCUMENT_LAYOUT_ANALYSIS)
76-
cloud_provider = GoogleV2Repository(server_parameters=server_parameters, service_logger=service_logger)
77-
execute_on_cloud_use_case = ExecuteOnCloudUseCase(cloud_provider=cloud_provider, service_logger=service_logger)
78-
7979
with open(pdf_file.get_path(task.params.filename), "rb") as stream:
8080
file_content = stream.read()
8181

@@ -88,11 +88,11 @@ def extract_segments_cloud(pdf_file: PdfFile, task: Task, xml_file_name: str = "
8888
files=files,
8989
data={"fast": "False"},
9090
)
91-
response, success, error = execute_on_cloud_use_case.execute(rest_call)
91+
response, success, error = EXECUTE_ON_CLOUD.execute(rest_call)
9292
if not success:
9393
return False, None
9494

95-
if not save_cloud_xml_file(execute_on_cloud_use_case, xml_file_name):
95+
if not save_cloud_xml_file(xml_file_name):
9696
return False, None
9797

9898
segments: list[SegmentBox] = [SegmentBox(**segment_box) for segment_box in response.json()]
@@ -106,14 +106,14 @@ def extract_segments_cloud(pdf_file: PdfFile, task: Task, xml_file_name: str = "
106106
)
107107

108108

109-
def save_cloud_xml_file(execute_on_cloud_use_case: ExecuteOnCloudUseCase, xml_file_name: str) -> bool:
109+
def save_cloud_xml_file(xml_file_name: str) -> bool:
110110
try:
111111
rest_call = RestCall(
112112
port=DOCUMENT_LAYOUT_ANALYSIS_PORT,
113113
endpoint=["get_xml", xml_file_name],
114114
method="GET",
115115
)
116-
response, success, error = execute_on_cloud_use_case.execute(rest_call)
116+
response, success, error = EXECUTE_ON_CLOUD.execute(rest_call)
117117

118118
if not success:
119119
return False

0 commit comments

Comments
 (0)