Skip to content

Commit 35e73cd

Browse files
committed
Use ml cloud connector for getting the xml
1 parent 16560ab commit 35e73cd

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@ fastapi==0.111.1
22
python-multipart==0.0.9
33
uvicorn==0.30.3
44
gunicorn==22.0.0
5-
requests==2.32.3
65
graypy==2.1.0
76
PyYAML==6.0.1
87
pymongo==4.8.0
98
httpx==0.27.0
109
sentry-sdk==2.8.0
1110
setuptools==75.4.0
1211
git+https://github.com/huridocs/queue-processor@fce76dc0c39a8d929128304eb77047985ccd344a
13-
git+https://github.com/huridocs/ml-cloud-connector@d62cb04509a33b54d02b2004255abb097d9a4614
12+
git+https://github.com/huridocs/ml-cloud-connector@6f8dd2aaa890b9cebbab3254dddec65b6bbce95f
1413

src/tests/test_app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def test_info(self):
1515

1616
self.assertEqual(200, response.status_code)
1717

18-
@unittest.skip("This test requires a running cloud service")
18+
# @unittest.skip("This test requires a running cloud service")
1919
def test_cloud(self):
2020
test_file_path = Path(APP_PATH, "tests", "test_files", "test.pdf")
2121
with open(test_file_path, "rb") as stream:

src/use_cases/extract_segments_use_case.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,14 @@ def extract_segments_cloud(pdf_file: PdfFile, task: Task, xml_file_name: str = "
8888
files=files,
8989
data={"fast": "False"},
9090
)
91-
results, success, error = execute_on_cloud_use_case.execute(rest_call)
91+
response, success, error = execute_on_cloud_use_case.execute(rest_call)
9292
if not success:
9393
return False, None
9494

95-
if not save_cloud_xml_file(cloud_provider, xml_file_name):
95+
if not save_cloud_xml_file(execute_on_cloud_use_case, xml_file_name):
9696
return False, None
9797

98-
segments: list[SegmentBox] = [SegmentBox(**segment_box) for segment_box in results]
98+
segments: list[SegmentBox] = [SegmentBox(**segment_box) for segment_box in response.json()]
9999

100100
return True, ExtractionData(
101101
tenant=task.tenant,
@@ -106,9 +106,18 @@ def extract_segments_cloud(pdf_file: PdfFile, task: Task, xml_file_name: str = "
106106
)
107107

108108

109-
def save_cloud_xml_file(cloud_provider: CloudProviderRepository, xml_file_name: str) -> bool:
109+
def save_cloud_xml_file(execute_on_cloud_use_case: ExecuteOnCloudUseCase, xml_file_name: str) -> bool:
110110
try:
111-
response = requests.get(f"http://{cloud_provider.get_ip()}:{DOCUMENT_LAYOUT_ANALYSIS_PORT}/get_xml/{xml_file_name}")
111+
rest_call = RestCall(
112+
port=DOCUMENT_LAYOUT_ANALYSIS_PORT,
113+
endpoint=["get_xml", xml_file_name],
114+
method="GET",
115+
)
116+
response, success, error = execute_on_cloud_use_case.execute(rest_call)
117+
118+
if not success:
119+
return False
120+
112121
xml_file_path = Path(DATA_PATH, xml_file_name)
113122
xml_file_path.write_bytes(response.content)
114123
return True

0 commit comments

Comments
 (0)