From bafcc008bc0137b1f548a73523e89ae57f1f78a3 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:27:00 -0300 Subject: [PATCH 1/4] [SDK] fixed none and undefined on response --- apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/src/index.ts | 46 ++++++++++++++++++++------ apps/python-sdk/firecrawl/__init__.py | 2 +- apps/python-sdk/firecrawl/firecrawl.py | 38 +++++++++++++++------ 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 9aab848ae..46f853086 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.11.0", + "version": "1.11.1", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index af9dbc756..6b89960ed 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -571,17 +571,30 @@ export default class FirecrawlApp { allData = data; } } - return ({ + + let resp: CrawlStatusResponse | ErrorResponse = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: allData, - error: response.data.error, - }) + data: allData + } + + if (!response.data.success && response.data.error) { + resp = { + ...resp, + success: false, + error: response.data.error + } as ErrorResponse; + } + + if (response.data.next) { + (resp as CrawlStatusResponse).next = response.data.next; + } + + return resp; } else { this.handleError(response, "check crawl status"); } @@ -805,17 +818,30 @@ export default class FirecrawlApp { allData = data; } } - return ({ + + let resp: BatchScrapeStatusResponse | ErrorResponse = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: allData, - error: response.data.error, - }) + data: allData + } + + if (!response.data.success && response.data.error) { + resp = { + ...resp, + success: false, + error: response.data.error + } as ErrorResponse; + } + + if (response.data.next) { + (resp as BatchScrapeStatusResponse).next = response.data.next; + } + + return resp; } else { this.handleError(response, "check batch scrape status"); } diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index d4d246e9e..5528b3b2a 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ from .firecrawl import FirecrawlApp # noqa -__version__ = "1.8.0" +__version__ = "1.8.1" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 271a13f06..8eb7acee9 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -266,17 +266,25 @@ def check_crawl_status(self, id: str) -> Any: logger.error(f"Error during pagination request: {e}") break status_data['data'] = data - - return { - 'success': True, + + response = { 'status': status_data.get('status'), 'total': status_data.get('total'), 'completed': status_data.get('completed'), 'creditsUsed': status_data.get('creditsUsed'), 'expiresAt': status_data.get('expiresAt'), - 'data': status_data.get('data'), - 'error': status_data.get('error'), - 'next': status_data.get('next', None) + 'data': status_data.get('data') + } + + if 'error' in status_data: + response['error'] = status_data['error'] + + if 'next' in status_data: + response['next'] = status_data['next'] + + return { + 'success': False if 'error' in status_data else True, + **response } else: self._handle_error(response, 'check crawl status') @@ -476,16 +484,24 @@ def check_batch_scrape_status(self, id: str) -> Any: break status_data['data'] = data - return { - 'success': True, + response = { 'status': status_data.get('status'), 'total': status_data.get('total'), 'completed': status_data.get('completed'), 'creditsUsed': status_data.get('creditsUsed'), 'expiresAt': status_data.get('expiresAt'), - 'data': status_data.get('data'), - 'error': status_data.get('error'), - 'next': status_data.get('next', None) + 'data': status_data.get('data') + } + + if 'error' in status_data: + response['error'] = status_data['error'] + + if 'next' in status_data: + response['next'] = status_data['next'] + + return { + 'success': False if 'error' in status_data else True, + **response } else: self._handle_error(response, 'check batch scrape status') From 55dad5ea13da577e86122fb832b8534627d1f03c Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:56:39 -0300 Subject: [PATCH 2/4] fixed empty data with next causing infinite loop --- apps/js-sdk/firecrawl/src/index.ts | 12 ++++++++++++ apps/python-sdk/firecrawl/firecrawl.py | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 6b89960ed..687325d35 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -565,6 +565,10 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + console.warn("Expected 'data' is missing.") + break + } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } @@ -812,6 +816,10 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + console.warn("Expected 'data' is missing.") + break + } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } @@ -995,6 +1003,10 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + console.warn("Expected 'data' is missing.") + break + } statusResponse = await this.getRequest(statusData.next, headers); statusData = statusResponse.data; data = data.concat(statusData.data); diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 8eb7acee9..812f7bd1f 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -250,6 +250,9 @@ def check_crawl_status(self, id: str) -> Any: if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + logger.warning("Expected 'data' is missing.") + break next_url = status_data.get('next') if not next_url: logger.warning("Expected 'next' URL is missing.") @@ -467,6 +470,9 @@ def check_batch_scrape_status(self, id: str) -> Any: if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + logger.warning("Expected 'data' is missing.") + break next_url = status_data.get('next') if not next_url: logger.warning("Expected 'next' URL is missing.") @@ -685,6 +691,9 @@ def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: i if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + logger.warning("Expected 'data' is missing.") + break status_response = self._get_request(status_data['next'], headers) status_data = status_response.json() data.extend(status_data.get('data', [])) From 12cd9f083ca5658519dbf2296c4711cc47407fd1 Mon Sep 17 00:00:00 2001 From: Rafael Miller <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:12:30 -0300 Subject: [PATCH 3/4] removed warnings --- apps/js-sdk/firecrawl/src/index.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index a30387784..474eea830 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -566,7 +566,6 @@ export default class FirecrawlApp { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { if (data.length === 0) { - console.warn("Expected 'data' is missing.") break } statusData = (await this.getRequest(statusData.next, headers)).data; @@ -817,7 +816,6 @@ export default class FirecrawlApp { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { if (data.length === 0) { - console.warn("Expected 'data' is missing.") break } statusData = (await this.getRequest(statusData.next, headers)).data; @@ -1006,7 +1004,6 @@ export default class FirecrawlApp { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { if (data.length === 0) { - console.warn("Expected 'data' is missing.") break } statusResponse = await this.getRequest(statusData.next, headers); From a54a5dbb4510c641b111106a963874a45c441511 Mon Sep 17 00:00:00 2001 From: Rafael Miller <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:13:34 -0300 Subject: [PATCH 4/4] removed warnings --- apps/python-sdk/firecrawl/firecrawl.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 812f7bd1f..d32164052 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -251,7 +251,6 @@ def check_crawl_status(self, id: str) -> Any: data = status_data['data'] while 'next' in status_data: if len(status_data['data']) == 0: - logger.warning("Expected 'data' is missing.") break next_url = status_data.get('next') if not next_url: @@ -471,7 +470,6 @@ def check_batch_scrape_status(self, id: str) -> Any: data = status_data['data'] while 'next' in status_data: if len(status_data['data']) == 0: - logger.warning("Expected 'data' is missing.") break next_url = status_data.get('next') if not next_url: @@ -692,7 +690,6 @@ def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: i data = status_data['data'] while 'next' in status_data: if len(status_data['data']) == 0: - logger.warning("Expected 'data' is missing.") break status_response = self._get_request(status_data['next'], headers) status_data = status_response.json()