Skip to content

Commit

Permalink
fix(python-sdk): add JSON parse error reporting clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
mogery committed Jan 17, 2025
1 parent d5929af commit 805bfa4
Showing 1 changed file with 82 additions and 24 deletions.
106 changes: 82 additions & 24 deletions apps/python-sdk/firecrawl/firecrawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
json=scrape_params,
)
if response.status_code == 200:
response = response.json()
try:
response = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if response['success'] and 'data' in response:
return response['data']
elif "error" in response:
Expand Down Expand Up @@ -159,7 +162,10 @@ def search(self, query: str, params: Optional[Union[Dict[str, Any], SearchParams
if response.status_code != 200:
raise Exception(f"Request failed with status code {response.status_code}")

return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')

def crawl_url(self, url: str,
params: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -194,7 +200,10 @@ def crawl_url(self, url: str,
json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200:
id = response.json().get('id')
try:
id = response.json().get('id')
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
return self._monitor_job_status(id, headers, poll_interval)

else:
Expand Down Expand Up @@ -223,7 +232,10 @@ def async_crawl_url(self, url: str, params: Optional[Dict[str, Any]] = None, ide
json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200:
return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, 'start crawl job')

Expand All @@ -245,7 +257,10 @@ def check_crawl_status(self, id: str) -> Any:
headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}{endpoint}', headers)
if response.status_code == 200:
status_data = response.json()
try:
status_data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed':
if 'data' in status_data:
data = status_data['data']
Expand All @@ -261,7 +276,10 @@ def check_crawl_status(self, id: str) -> Any:
if status_response.status_code != 200:
logger.error(f"Failed to fetch next page: {status_response.status_code}")
break
next_data = status_response.json()
try:
next_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(next_data.get('data', []))
status_data = next_data
except Exception as e:
Expand Down Expand Up @@ -304,7 +322,10 @@ def cancel_crawl(self, id: str) -> Dict[str, Any]:
headers = self._prepare_headers()
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
if response.status_code == 200:
return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, "cancel crawl job")

Expand Down Expand Up @@ -352,7 +373,10 @@ def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
json=json_data,
)
if response.status_code == 200:
response = response.json()
try:
response = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if response['success'] and 'links' in response:
return response
elif 'error' in response:
Expand Down Expand Up @@ -395,7 +419,10 @@ def batch_scrape_urls(self, urls: list[str],
json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200:
id = response.json().get('id')
try:
id = response.json().get('id')
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
return self._monitor_job_status(id, headers, poll_interval)

else:
Expand Down Expand Up @@ -424,7 +451,10 @@ def async_batch_scrape_urls(self, urls: list[str], params: Optional[Dict[str, An
json_data.update(params)
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200:
return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, 'start batch scrape job')

Expand Down Expand Up @@ -464,7 +494,10 @@ def check_batch_scrape_status(self, id: str) -> Any:
headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}{endpoint}', headers)
if response.status_code == 200:
status_data = response.json()
try:
status_data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed':
if 'data' in status_data:
data = status_data['data']
Expand All @@ -480,7 +513,10 @@ def check_batch_scrape_status(self, id: str) -> Any:
if status_response.status_code != 200:
logger.error(f"Failed to fetch next page: {status_response.status_code}")
break
next_data = status_response.json()
try:
next_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(next_data.get('data', []))
status_data = next_data
except Exception as e:
Expand Down Expand Up @@ -550,7 +586,10 @@ def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> An
headers
)
if response.status_code == 200:
data = response.json()
try:
data = response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if data['success']:
job_id = data.get('id')
if not job_id:
Expand All @@ -563,7 +602,10 @@ def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> An
headers
)
if status_response.status_code == 200:
status_data = status_response.json()
try:
status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed':
if status_data['success']:
return status_data
Expand Down Expand Up @@ -601,7 +643,10 @@ def get_extract_status(self, job_id: str) -> Dict[str, Any]:
try:
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
if response.status_code == 200:
return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, "get extract status")
except Exception as e:
Expand Down Expand Up @@ -641,7 +686,10 @@ def async_extract(self, urls: List[str], params: Optional[Dict[str, Any]] = None
try:
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
if response.status_code == 200:
return response.json()
try:
return response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
else:
self._handle_error(response, "async extract")
except Exception as e:
Expand Down Expand Up @@ -771,16 +819,22 @@ def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: i

status_response = self._get_request(api_url, headers)
if status_response.status_code == 200:
status_data = status_response.json()
try:
status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
if status_data['status'] == 'completed':
if 'data' in status_data:
data = status_data['data']
while 'next' in status_data:
if len(status_data['data']) == 0:
break
status_response = self._get_request(status_data['next'], headers)
status_data = status_response.json()
data.extend(status_data.get('data', []))
if len(status_data['data']) == 0:
break
status_response = self._get_request(status_data['next'], headers)
try:
status_data = status_response.json()
except:
raise Exception(f'Failed to parse Firecrawl response as JSON.')
data.extend(status_data.get('data', []))
status_data['data'] = data
return status_data
else:
Expand All @@ -804,8 +858,12 @@ def _handle_error(self, response: requests.Response, action: str) -> None:
Raises:
Exception: An exception with a message containing the status code and error details from the response.
"""
error_message = response.json().get('error', 'No error message provided.')
error_details = response.json().get('details', 'No additional error details provided.')
try:
error_message = response.json().get('error', 'No error message provided.')
error_details = response.json().get('details', 'No additional error details provided.')
except:
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)


if response.status_code == 402:
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
Expand Down

0 comments on commit 805bfa4

Please sign in to comment.