diff --git a/clusterman/batch/cluster_metrics_collector.py b/clusterman/batch/cluster_metrics_collector.py index df58b0199..8ab436598 100644 --- a/clusterman/batch/cluster_metrics_collector.py +++ b/clusterman/batch/cluster_metrics_collector.py @@ -27,6 +27,7 @@ import colorlog import staticconf +from botocore.exceptions import ClientError from clusterman_metrics import ClustermanMetricsBotoClient from clusterman_metrics import generate_key_with_dimensions from clusterman_metrics import METADATA @@ -130,6 +131,9 @@ def load_pool_managers(self) -> None: try: logger.info(f"Loading resource groups for {pool}.{scheduler} on {self.options.cluster}") self.pool_managers[f"{pool}.{scheduler}"] = PoolManager(self.options.cluster, pool, scheduler) + except ClientError as error: + logger.exception(error) + raise except Exception as e: logger.exception(e) continue diff --git a/tests/batch/cluster_metrics_collector_test.py b/tests/batch/cluster_metrics_collector_test.py index 1f9431cae..3e070be20 100644 --- a/tests/batch/cluster_metrics_collector_test.py +++ b/tests/batch/cluster_metrics_collector_test.py @@ -16,6 +16,7 @@ import mock import pytest +from botocore.exceptions import ClientError from clusterman_metrics import ClustermanMetricsBotoClient from clusterman.autoscaler.pool_manager import PoolManager @@ -166,3 +167,16 @@ def mock_write_metrics(writer, generator, pools, schedulers): mock.call(2), mock.call(2), ] + + +def test_load_pool_managers(): + ClusterMetricsCollector = mock.Mock() + batch = ClusterMetricsCollector() + batch.load_pool_managers.side_effect = ClientError( + {}, "when calling the DescribeTags operation (reached max retries: 4)" + ) + batch.run_interval = 10 + batch.metrics_client = mock.MagicMock(spec_set=ClustermanMetricsBotoClient) + batch.pools = {"mesos": ["pool-1", "pool-2"]} + with pytest.raises(ClientError): + batch.load_pool_managers()