diff --git a/src/parxy_core/drivers/abstract_driver.py b/src/parxy_core/drivers/abstract_driver.py index 610af36..38180c6 100644 --- a/src/parxy_core/drivers/abstract_driver.py +++ b/src/parxy_core/drivers/abstract_driver.py @@ -14,6 +14,9 @@ FileNotFoundException, ParsingException, AuthenticationException, + RateLimitException, + QuotaExceededException, + InputValidationException, ) from parxy_core.models.config import BaseConfig from parxy_core.logging import create_null_logger @@ -137,9 +140,7 @@ def parse( except Exception as ex: self._logger.error( - 'Error while parsing file', - file, - self.__class__.__name__, + f'Error while parsing file {file if isinstance(file, str) else "stream"} using {self.__class__.__name__}', exc_info=True, ) @@ -154,7 +155,14 @@ def parse( parxy_exc = FileNotFoundException(ex, self.__class__) elif isinstance( ex, - (FileNotFoundException, AuthenticationException, ParsingException), + ( + FileNotFoundException, + AuthenticationException, + ParsingException, + RateLimitException, + QuotaExceededException, + InputValidationException, + ), ): parxy_exc = ex else: diff --git a/src/parxy_core/drivers/landingai.py b/src/parxy_core/drivers/landingai.py index bc624ac..e7d3bab 100644 --- a/src/parxy_core/drivers/landingai.py +++ b/src/parxy_core/drivers/landingai.py @@ -3,6 +3,9 @@ from typing import TYPE_CHECKING from parxy_core.exceptions.authentication_exception import AuthenticationException +from parxy_core.exceptions.rate_limit_exception import RateLimitException +from parxy_core.exceptions.quota_exceeded_exception import QuotaExceededException +from parxy_core.exceptions.input_validation_exception import InputValidationException from parxy_core.tracing.utils import trace_with_output # Type hints that will be available at runtime when unstructured is installed @@ -77,7 +80,7 @@ def _handle( level: str = 'page', **kwargs, ) -> Document: - from landingai_ade import AuthenticationError + from landingai_ade import AuthenticationError, RateLimitError, APIStatusError try: filename, stream = self.handle_file_input(file) @@ -90,8 +93,25 @@ def _handle( except AuthenticationError as aex: raise AuthenticationException( message=str(aex), - service=self.__class__, + service=self.__class__.__name__, ) from aex + except RateLimitError as rlex: + raise RateLimitException( + message=str(rlex), + service=self.__class__.__name__, + ) from rlex + except APIStatusError as ase: + status_code_exceptions = { + 429: RateLimitException, + 402: QuotaExceededException, + 422: InputValidationException, + } + if exc_class := status_code_exceptions.get(ase.status_code): + raise exc_class( + message=str(ase), + service=self.__class__.__name__, + ) from ase + raise doc = landingaiade_to_parxy(parse_response) diff --git a/src/parxy_core/drivers/llmwhisperer.py b/src/parxy_core/drivers/llmwhisperer.py index fed5d41..1bf592d 100644 --- a/src/parxy_core/drivers/llmwhisperer.py +++ b/src/parxy_core/drivers/llmwhisperer.py @@ -21,6 +21,9 @@ FileNotFoundException, ParsingException, AuthenticationException, + RateLimitException, + QuotaExceededException, + InputValidationException, ) from parxy_core.models import Document, Page from parxy_core.utils import safe_json_dumps @@ -150,19 +153,36 @@ def _handle( except FileNotFoundError as fex: raise FileNotFoundException(fex, self.SERVICE_NAME) from fex except LLMWhispererClientException as wex: - if wex.value['status_code'] in (401, 403): + status_code = wex.value.get('status_code') + error_message = ( + str(wex.error_message()) if callable(wex.error_message) else str(wex) + ) + + if status_code in (401, 403): raise AuthenticationException( - message=str(wex.error_message()), + message=error_message, service=self.SERVICE_NAME, details=wex.value, - ) # from wex - else: - raise ParsingException( - wex.error_message if hasattr(wex, 'error_message') else str(wex), - self.SERVICE_NAME, + ) from wex + + status_code_exceptions = { + 429: RateLimitException, + 402: QuotaExceededException, + 422: InputValidationException, + } + if exc_class := status_code_exceptions.get(status_code): + raise exc_class( + message=error_message, + service=self.SERVICE_NAME, details=wex.value, ) from wex + raise ParsingException( + error_message, + self.SERVICE_NAME, + details=wex.value, + ) from wex + doc = llmwhisperer_to_parxy(res) doc.filename = filename diff --git a/src/parxy_core/exceptions/__init__.py b/src/parxy_core/exceptions/__init__.py index 89a0c95..224de58 100644 --- a/src/parxy_core/exceptions/__init__.py +++ b/src/parxy_core/exceptions/__init__.py @@ -8,3 +8,12 @@ from parxy_core.exceptions.unsupported_format_exception import ( UnsupportedFormatException as UnsupportedFormatException, ) +from parxy_core.exceptions.rate_limit_exception import ( + RateLimitException as RateLimitException, +) +from parxy_core.exceptions.quota_exceeded_exception import ( + QuotaExceededException as QuotaExceededException, +) +from parxy_core.exceptions.input_validation_exception import ( + InputValidationException as InputValidationException, +) diff --git a/src/parxy_core/exceptions/input_validation_exception.py b/src/parxy_core/exceptions/input_validation_exception.py new file mode 100644 index 0000000..012abf1 --- /dev/null +++ b/src/parxy_core/exceptions/input_validation_exception.py @@ -0,0 +1,66 @@ +from typing import Optional + + +class InputValidationException(Exception): + """Exception raised when input fails service validation constraints. + + This exception should be raised when a service returns a 422 status code + or indicates that the input doesn't meet requirements such as page limits, + file size limits, or other validation constraints. + + Attributes + ---------- + message : str + Explanation of the validation error + service : str + Name of the service where validation failed (e.g., 'LlamaParse', 'LandingAI') + details : dict, optional + Additional details about the error, such as constraints or limits + + Example + --------- + try: + # API call fails with 422 + raise InputValidationException( + message="PDF must not exceed 100 pages", + service="LandingAI", + details={"max_pages": 100, "actual_pages": 150} + ) + except InputValidationException as e: + print(e) # Will print: "Input validation failed for LandingAI: PDF must not exceed 100 pages" + """ + + def __init__( + self, + message: str, + service: str, + details: Optional[dict] = None, + ): + """Initialize the input validation error. + + Parameters + ---------- + message : str + Human-readable error message + service : str + Name of the service where validation failed + details : dict, optional + Additional error details, by default None + """ + self.message = message + self.service = service + self.details = details or {} + super().__init__(self.message) + + def __str__(self) -> str: + """Return a string representation of the error. + + Returns + ------- + str + Formatted error message including service name + """ + base_message = f'Input validation failed for {self.service}: {self.message}' + if self.details: + return f'{base_message}\nDetails: {self.details}' + return base_message diff --git a/src/parxy_core/exceptions/quota_exceeded_exception.py b/src/parxy_core/exceptions/quota_exceeded_exception.py new file mode 100644 index 0000000..db08e5a --- /dev/null +++ b/src/parxy_core/exceptions/quota_exceeded_exception.py @@ -0,0 +1,65 @@ +from typing import Optional + + +class QuotaExceededException(Exception): + """Exception raised when account quota or balance is insufficient. + + This exception should be raised when a service returns a 402 status code + or indicates that the account balance, credits, or quota has been exhausted. + + Attributes + ---------- + message : str + Explanation of the quota error + service : str + Name of the service where quota was exceeded (e.g., 'LlamaParse', 'LandingAI') + details : dict, optional + Additional details about the error, such as response data or remaining quota + + Example + --------- + try: + # API call fails with 402 + raise QuotaExceededException( + message="User balance is insufficient", + service="LandingAI", + details={"error_code": 402, "response": {"error": "Payment Required"}} + ) + except QuotaExceededException as e: + print(e) # Will print: "Quota exceeded for LandingAI: User balance is insufficient" + """ + + def __init__( + self, + message: str, + service: str, + details: Optional[dict] = None, + ): + """Initialize the quota exceeded error. + + Parameters + ---------- + message : str + Human-readable error message + service : str + Name of the service where quota was exceeded + details : dict, optional + Additional error details, by default None + """ + self.message = message + self.service = service + self.details = details or {} + super().__init__(self.message) + + def __str__(self) -> str: + """Return a string representation of the error. + + Returns + ------- + str + Formatted error message including service name + """ + base_message = f'Quota exceeded for {self.service}: {self.message}' + if self.details: + return f'{base_message}\nDetails: {self.details}' + return base_message diff --git a/src/parxy_core/exceptions/rate_limit_exception.py b/src/parxy_core/exceptions/rate_limit_exception.py new file mode 100644 index 0000000..05e2ba8 --- /dev/null +++ b/src/parxy_core/exceptions/rate_limit_exception.py @@ -0,0 +1,76 @@ +from typing import Optional + + +class RateLimitException(Exception): + """Exception raised when API rate limits are exceeded. + + This exception should be raised when a service returns a 429 status code + or indicates that the request rate or quota has been exceeded. + + Attributes + ---------- + message : str + Explanation of the rate limit error + service : str + Name of the service where rate limit was hit (e.g., 'LlamaParse', 'LandingAI') + retry_after : int, optional + Number of seconds to wait before retrying, if provided by the service + details : dict, optional + Additional details about the error, such as response data or error codes + + Example + --------- + try: + # API call fails with 429 + raise RateLimitException( + message="Rate limit exceeded", + service="LandingAI", + retry_after=60, + details={"error_code": 429, "response": {"error": "Rate limit exceeded"}} + ) + except RateLimitException as e: + print(e) # Will print: "Rate limit exceeded for LandingAI: Rate limit exceeded" + if e.retry_after: + print(f"Retry after {e.retry_after} seconds") + """ + + def __init__( + self, + message: str, + service: str, + retry_after: Optional[int] = None, + details: dict = None, + ): + """Initialize the rate limit error. + + Parameters + ---------- + message : str + Human-readable error message + service : str + Name of the service where rate limit was hit + retry_after : int, optional + Seconds to wait before retrying, by default None + details : dict, optional + Additional error details, by default None + """ + self.message = message + self.service = service + self.retry_after = retry_after + self.details = details or {} + super().__init__(self.message) + + def __str__(self) -> str: + """Return a string representation of the error. + + Returns + ------- + str + Formatted error message including service name and retry info + """ + base_message = f'Rate limit exceeded for {self.service}: {self.message}' + if self.retry_after: + base_message = f'{base_message} (retry after {self.retry_after}s)' + if self.details: + return f'{base_message}\nDetails: {self.details}' + return base_message diff --git a/tests/drivers/test_landingai.py b/tests/drivers/test_landingai.py index c371eb6..d70fe97 100644 --- a/tests/drivers/test_landingai.py +++ b/tests/drivers/test_landingai.py @@ -6,6 +6,9 @@ from parxy_core.exceptions import ( AuthenticationException, FileNotFoundException, + RateLimitException, + QuotaExceededException, + InputValidationException, ) from parxy_core.models import TextBlock, Page @@ -267,3 +270,178 @@ def test_landingai_driver_cost_estimation(self, mock_tracer, mock_client_class): assert ade_details['job_id'] == 'td8wu72tq2g9l9tfgkwn3q3kp' assert ade_details['page_count'] == 2 assert ade_details['version'] == 'dpt-2-20251103' + + @patch('parxy_core.drivers.landingai.LandingAIADEDriver.handle_file_input') + @patch('landingai_ade.LandingAIADE') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_landingai_driver_handles_rate_limit_error( + self, mock_tracer, mock_client_class, mock_handle_file + ): + """Test that RateLimitError from the client raises RateLimitException""" + from landingai_ade import RateLimitError + from httpx import Response, Request + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup file input mock + mock_handle_file.return_value = ('test.pdf', b'fake content') + + # Setup client to raise RateLimitError + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = Response( + status_code=429, + request=Request('POST', 'https://api.landing.ai/parse'), + ) + mock_client.parse.side_effect = RateLimitError( + 'Rate limit exceeded', + response=mock_response, + body={'error': 'Rate limit exceeded'}, + ) + + driver = LandingAIADEDriver(LandingAIConfig()) + + with pytest.raises(RateLimitException) as excinfo: + driver.parse('test.pdf') + + assert 'Rate limit' in str(excinfo.value) + assert excinfo.value.service == 'LandingAIADEDriver' + + @patch('parxy_core.drivers.landingai.LandingAIADEDriver.handle_file_input') + @patch('landingai_ade.LandingAIADE') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_landingai_driver_handles_api_status_error_429( + self, mock_tracer, mock_client_class, mock_handle_file + ): + """Test that APIStatusError with 429 status raises RateLimitException""" + from landingai_ade import APIStatusError + from httpx import Response, Request + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup file input mock + mock_handle_file.return_value = ('test.pdf', b'fake content') + + # Setup client to raise APIStatusError with 429 + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = Response( + status_code=429, + request=Request('POST', 'https://api.landing.ai/parse'), + ) + mock_client.parse.side_effect = APIStatusError( + "Error code: 429 - {'error': 'Rate limit exceeded'}", + response=mock_response, + body={'error': 'Rate limit exceeded'}, + ) + + driver = LandingAIADEDriver(LandingAIConfig()) + + with pytest.raises(RateLimitException) as excinfo: + driver.parse('test.pdf') + + assert '429' in str(excinfo.value) + assert excinfo.value.service == 'LandingAIADEDriver' + + @patch('parxy_core.drivers.landingai.LandingAIADEDriver.handle_file_input') + @patch('landingai_ade.LandingAIADE') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_landingai_driver_handles_api_status_error_402( + self, mock_tracer, mock_client_class, mock_handle_file + ): + """Test that APIStatusError with 402 status raises QuotaExceededException""" + from landingai_ade import APIStatusError + from httpx import Response, Request + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup file input mock + mock_handle_file.return_value = ('test.pdf', b'fake content') + + # Setup client to raise APIStatusError with 402 + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = Response( + status_code=402, + request=Request('POST', 'https://api.landing.ai/parse'), + ) + mock_client.parse.side_effect = APIStatusError( + "Error code: 402 - {'error': 'Payment Required. User balance is insufficient (including pending jobs).'}", + response=mock_response, + body={ + 'error': 'Payment Required. User balance is insufficient (including pending jobs).' + }, + ) + + driver = LandingAIADEDriver(LandingAIConfig()) + + with pytest.raises(QuotaExceededException) as excinfo: + driver.parse('test.pdf') + + assert '402' in str(excinfo.value) + assert excinfo.value.service == 'LandingAIADEDriver' + + @patch('parxy_core.drivers.landingai.LandingAIADEDriver.handle_file_input') + @patch('landingai_ade.LandingAIADE') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_landingai_driver_handles_api_status_error_422( + self, mock_tracer, mock_client_class, mock_handle_file + ): + """Test that APIStatusError with 422 status raises InputValidationException""" + from landingai_ade import APIStatusError + from httpx import Response, Request + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup file input mock + mock_handle_file.return_value = ('test.pdf', b'fake content') + + # Setup client to raise APIStatusError with 422 + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + mock_response = Response( + status_code=422, + request=Request('POST', 'https://api.landing.ai/parse'), + ) + mock_client.parse.side_effect = APIStatusError( + "Error code: 422 - {'error': 'PDF must not exceed 100 pages.'}", + response=mock_response, + body={'error': 'PDF must not exceed 100 pages.'}, + ) + + driver = LandingAIADEDriver(LandingAIConfig()) + + with pytest.raises(InputValidationException) as excinfo: + driver.parse('test.pdf') + + assert '422' in str(excinfo.value) + assert 'PDF must not exceed 100 pages' in str(excinfo.value) + assert excinfo.value.service == 'LandingAIADEDriver' diff --git a/tests/drivers/test_llmwhisperer.py b/tests/drivers/test_llmwhisperer.py index fe4d6fe..dfcd3f9 100644 --- a/tests/drivers/test_llmwhisperer.py +++ b/tests/drivers/test_llmwhisperer.py @@ -5,6 +5,9 @@ from parxy_core.exceptions import ( AuthenticationException, FileNotFoundException, + RateLimitException, + QuotaExceededException, + InputValidationException, ) from parxy_core.models import Page @@ -368,3 +371,117 @@ def test_llmwhisperer_driver_metadata_extraction( assert 'processing_started_at' not in whisper_details assert 'requested_pages' not in whisper_details assert 'tag' not in whisper_details + + @patch('unstract.llmwhisperer.LLMWhispererClientV2') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_llmwhisperer_driver_handles_quota_exceeded_402( + self, mock_tracer, mock_client_class + ): + """Test that 402 status code raises QuotaExceededException""" + from unstract.llmwhisperer.client_v2 import LLMWhispererClientException + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup client mock + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + # Create exception with 402 status code + exception_value = { + 'message': 'Extraction failed since you breached your free processing limit of 100 pages max per day.', + 'status_code': 402, + 'extraction': {}, + } + mock_exception = LLMWhispererClientException(exception_value) + mock_client.whisper.side_effect = mock_exception + + driver = LlmWhispererDriver(LlmWhispererConfig()) + + with pytest.raises(QuotaExceededException) as excinfo: + driver.parse(b'%PDF-1.4 test content') + + assert 'breached your free processing limit' in str(excinfo.value) + assert excinfo.value.service == 'llmwhisperer' + assert excinfo.value.details['status_code'] == 402 + + @patch('unstract.llmwhisperer.LLMWhispererClientV2') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_llmwhisperer_driver_handles_rate_limit_429( + self, mock_tracer, mock_client_class + ): + """Test that 429 status code raises RateLimitException""" + from unstract.llmwhisperer.client_v2 import LLMWhispererClientException + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup client mock + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + # Create exception with 429 status code + exception_value = { + 'message': 'Rate limit exceeded. Please try again later.', + 'status_code': 429, + 'extraction': {}, + } + mock_exception = LLMWhispererClientException(exception_value) + mock_client.whisper.side_effect = mock_exception + + driver = LlmWhispererDriver(LlmWhispererConfig()) + + with pytest.raises(RateLimitException) as excinfo: + driver.parse(b'%PDF-1.4 test content') + + assert 'Rate limit' in str(excinfo.value) + assert excinfo.value.service == 'llmwhisperer' + assert excinfo.value.details['status_code'] == 429 + + @patch('unstract.llmwhisperer.LLMWhispererClientV2') + @patch('parxy_core.drivers.abstract_driver.tracer') + def test_llmwhisperer_driver_handles_validation_error_422( + self, mock_tracer, mock_client_class + ): + """Test that 422 status code raises InputValidationException""" + from unstract.llmwhisperer.client_v2 import LLMWhispererClientException + + # Setup tracing mocks + mock_span = MagicMock() + mock_span.__enter__ = Mock(return_value=mock_span) + mock_span.__exit__ = Mock(return_value=False) + mock_tracer.span = Mock(return_value=mock_span) + mock_tracer.count = Mock() + mock_tracer.error = Mock() + + # Setup client mock + mock_client = MagicMock() + mock_client_class.return_value = mock_client + + # Create exception with 422 status code + exception_value = { + 'message': 'Invalid document format or constraints not met.', + 'status_code': 422, + 'extraction': {}, + } + mock_exception = LLMWhispererClientException(exception_value) + mock_client.whisper.side_effect = mock_exception + + driver = LlmWhispererDriver(LlmWhispererConfig()) + + with pytest.raises(InputValidationException) as excinfo: + driver.parse(b'%PDF-1.4 test content') + + assert 'Invalid document' in str(excinfo.value) + assert excinfo.value.service == 'llmwhisperer' + assert excinfo.value.details['status_code'] == 422