diff --git a/.github/workflows/MonitoringTools.yaml b/.github/workflows/MonitoringTools.yaml deleted file mode 100644 index 195a2c75..00000000 --- a/.github/workflows/MonitoringTools.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: Pylint-Tests-Codecov - -on: - push: - branches: - - master - paths: - - "MonitoringTools/**" - - ".github/workflows/MonitoringTools.yaml" - pull_request: - paths: - - "MonitoringTools/**" - - ".github/workflows/MonitoringTools.yaml" - -jobs: - Pylint-Tests-Codecov: - runs-on: ubuntu-22.04 - strategy: - matrix: - python-version: ["3.x"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - cd MonitoringTools - pip install -r requirements.txt - - - name: Analyse with pylint - run: cd MonitoringTools && pylint $(git ls-files '*.py') - - - name: Run tests and collect coverage - run: cd MonitoringTools && python3 -m pytest - - - name: Run tests and collect coverage - run: cd MonitoringTools && python3 -m pytest . --cov-report xml:coverage.xml --cov - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 - with: - token: ${{secrets.CODECOV_TOKEN}} - files: ./MonitoringTools/coverage.xml diff --git a/MonitoringTools/.coveragerc b/MonitoringTools/.coveragerc deleted file mode 100644 index d605a041..00000000 --- a/MonitoringTools/.coveragerc +++ /dev/null @@ -1,2 +0,0 @@ -[run] -omit = /usr/lib/* \ No newline at end of file diff --git a/MonitoringTools/.pylintrc b/MonitoringTools/.pylintrc deleted file mode 100644 index 53edc306..00000000 --- a/MonitoringTools/.pylintrc +++ /dev/null @@ -1,11 +0,0 @@ -[FORMAT] -# Black will enforce 88 chars on Python code -# this will enforce 120 chars on docs / comments -max-line-length=118 - -# Disable various warnings: -# C0114: Missing module string - we don't need module strings for the small repo -# W1401: Influxdb required backslashes -# R0801: Duplicate code due to test case - -disable=C0114, W1401, R0801 diff --git a/MonitoringTools/pytest.ini b/MonitoringTools/pytest.ini deleted file mode 100644 index e198a8fb..00000000 --- a/MonitoringTools/pytest.ini +++ /dev/null @@ -1,6 +0,0 @@ -[pytest] -pythonpath = ./usr/local/bin -testpaths = tests -python_files = *.py -python_functions = test_* -addopts = --ignore=setup.py \ No newline at end of file diff --git a/MonitoringTools/requirements.txt b/MonitoringTools/requirements.txt deleted file mode 100644 index b653285a..00000000 --- a/MonitoringTools/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -openstacksdk -https://github.com/stfc/openstack-query-library/releases/download/v0.1.6/openstackquery-0.1.6-py3-none-any.whl -pytest -pylint -pytest-cov \ No newline at end of file diff --git a/MonitoringTools/tests/__init__.py b/MonitoringTools/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/MonitoringTools/tests/test_collect_vm_stats.py b/MonitoringTools/tests/test_collect_vm_stats.py deleted file mode 100644 index 160c0f96..00000000 --- a/MonitoringTools/tests/test_collect_vm_stats.py +++ /dev/null @@ -1,137 +0,0 @@ -from unittest.mock import NonCallableMock, Mock, patch -from collect_vm_stats import ( - number_servers_active, - number_servers_build, - number_servers_error, - number_servers_shutoff, - number_servers_total, - get_all_server_statuses, - server_obj_to_len, - main, -) - - -def test_server_obj_to_len(): - """ - Tests that the length of a generator object is returned - """ - mock_generator_obj = iter([NonCallableMock(), NonCallableMock(), NonCallableMock()]) - res = server_obj_to_len(mock_generator_obj) - assert res == 3 - - -def test_number_servers_total(): - """ - Tests that the total number of servers can be queried and counted - """ - mock_conn = Mock() - mock_server_results = iter( - [NonCallableMock(), NonCallableMock(), NonCallableMock()] - ) - mock_conn.compute.servers.return_value = mock_server_results - num_returned = number_servers_total(mock_conn) - assert num_returned == 3 - - -def test_number_servers_active(): - """ - Tests that the active servers can be queried and counted - """ - mock_conn = Mock() - mock_server_results = iter( - [NonCallableMock(), NonCallableMock(), NonCallableMock()] - ) - mock_conn.compute.servers.return_value = mock_server_results - num_returned = number_servers_active(mock_conn) - assert num_returned == 3 - - -def test_number_servers_build(): - """ - Tests that the servers in build state can be queried and counted - """ - mock_conn = Mock() - mock_server_results = iter( - [NonCallableMock(), NonCallableMock(), NonCallableMock()] - ) - mock_conn.compute.servers.return_value = mock_server_results - num_returned = number_servers_build(mock_conn) - assert num_returned == 3 - - -def test_number_servers_error(): - """ - Tests that the error servers can be queried and counted - """ - mock_conn = Mock() - mock_server_results = iter( - [NonCallableMock(), NonCallableMock(), NonCallableMock()] - ) - mock_conn.compute.servers.return_value = mock_server_results - num_returned = number_servers_error(mock_conn) - assert num_returned == 3 - - -def test_number_servers_shutoff(): - """ - Tests that the shutoff servers can be queried and counted - """ - mock_conn = Mock() - mock_server_results = iter( - [NonCallableMock(), NonCallableMock(), NonCallableMock()] - ) - mock_conn.compute.servers.return_value = mock_server_results - num_returned = number_servers_shutoff(mock_conn) - assert num_returned == 3 - - -@patch("collect_vm_stats.connect") -def test_get_all_server_statuses(mock_connect): - """ - Tests that get_all_server_statuses calls appropriate functions and returns - data string to send to influx - """ - - def _mock_server_call(num_to_return): - """stubs out server call - :param num_to_return: number of mock objects to return - """ - return iter(NonCallableMock() for _ in range(num_to_return)) - - mock_connect.return_value.compute.servers.side_effect = [ - # total number found - _mock_server_call(10), - # active number found - _mock_server_call(4), - # build number found - _mock_server_call(3), - # error number found - _mock_server_call(2), - # shutoff number found - _mock_server_call(1), - ] - - mock_cloud_name = "prod" - res = get_all_server_statuses(mock_cloud_name) - - assert res == ( - "VMStats,instance=Prod " - "totalVM=10i,activeVM=4i," - "buildVM=3i,errorVM=2i,shutoffVM=1i" - ) - - -@patch("collect_vm_stats.run_scrape") -@patch("collect_vm_stats.parse_args") -def test_main(mock_parse_args, mock_run_scrape): - """ - tests main function calls run_scrape utility function properly - """ - mock_user_args = NonCallableMock() - main(mock_user_args) - mock_run_scrape.assert_called_once_with( - mock_parse_args.return_value, get_all_server_statuses - ) - mock_parse_args.assert_called_once_with( - mock_user_args, description="Get All VM Statuses" - ) diff --git a/MonitoringTools/tests/test_limits_to_influx.py b/MonitoringTools/tests/test_limits_to_influx.py deleted file mode 100644 index c09b1ba6..00000000 --- a/MonitoringTools/tests/test_limits_to_influx.py +++ /dev/null @@ -1,203 +0,0 @@ -from unittest.mock import patch, call, NonCallableMock -from limits_to_influx import ( - convert_to_data_string, - get_limit_prop_string, - extract_limits, - get_limits_for_project, - get_all_limits, - main, -) -import pytest - - -def test_convert_to_data_string_no_items(): - """ - Tests convert_to_data_string returns empty string when given empty dict as limit_details - """ - assert convert_to_data_string(NonCallableMock(), {}) == "" - - -@patch("limits_to_influx.get_limit_prop_string") -def test_convert_to_data_string_one_item(mock_get_limit_prop_string): - """ - Tests convert_to_data_string works with single entry in dict for limit_details - """ - mock_instance = "prod" - mock_project_details = NonCallableMock() - mock_limit_details = {"project foo": mock_project_details} - mock_get_limit_prop_string.return_value = "prop1=val1" - - res = convert_to_data_string(mock_instance, mock_limit_details) - assert res == 'Limits,Project="project\ foo",instance=Prod prop1=val1\n' - mock_get_limit_prop_string.assert_called_once_with(mock_project_details) - - -@patch("limits_to_influx.get_limit_prop_string") -def test_convert_to_data_string_multi_item(mock_get_limit_prop_string): - """ - Tests convert_to_data_string works with multiple entries in dict for limit_details - """ - mock_instance = "prod" - mock_project_details = NonCallableMock() - - mock_limit_details = { - "project foo": mock_project_details, - "project bar": mock_project_details, - } - mock_get_limit_prop_string.side_effect = ["prop1=val1", "prop1=val2"] - assert ( - convert_to_data_string(mock_instance, mock_limit_details) - == 'Limits,Project="project\ foo",instance=Prod prop1=val1\n' - 'Limits,Project="project\ bar",instance=Prod prop1=val2\n' - ) - - -@pytest.mark.parametrize( - "details, expected", - [ - ({}, ""), - ({"key1": "123"}, "key1=123i"), - ( - {"key1": "123", "key2": "456", "key3": "789"}, - "key1=123i,key2=456i,key3=789i", - ), - ], -) -def test_limit_prop_string(details, expected): - """ - tests get_limit_prop_string converts dict into data string properly - """ - assert get_limit_prop_string(details) == expected - - -def test_extract_limits_invalid(): - """ - tests extract_limits when given limits dict that is invalid - """ - with pytest.raises(RuntimeError): - extract_limits({}) - - -def test_extract_limits_valid(): - """ - test extract_limits function extracts proper limits and outputs in correct format - """ - mock_project_limits_dict = { - "server_meta": NonCallableMock(), - "personality": NonCallableMock(), - "server_groups_used": NonCallableMock(), - "image_meta": NonCallableMock(), - "personality_size": NonCallableMock(), - "keypairs": NonCallableMock(), - "security_group_rules": NonCallableMock(), - "server_groups": NonCallableMock(), - "total_cores_used": NonCallableMock(), - "total_ram_used": NonCallableMock(), - "instances_used": NonCallableMock(), - "security_groups": NonCallableMock(), - "floating_ips_used": NonCallableMock(), - "total_cores": NonCallableMock(), - "server_group_members": NonCallableMock(), - "floating_ips": NonCallableMock(), - "security_groups_used": NonCallableMock(), - "instances": NonCallableMock(), - "total_ram": NonCallableMock(), - } - assert extract_limits(mock_project_limits_dict) == { - "maxServerMeta": mock_project_limits_dict["server_meta"], - "maxPersonality": mock_project_limits_dict["personality"], - "totalServerGroupsUsed": mock_project_limits_dict["server_groups_used"], - "maxImageMeta": mock_project_limits_dict["image_meta"], - "maxPersonalitySize": mock_project_limits_dict["personality_size"], - "maxTotalKeypairs": mock_project_limits_dict["keypairs"], - "maxSecurityGroupRules": mock_project_limits_dict["security_group_rules"], - "maxServerGroups": mock_project_limits_dict["server_groups"], - "totalCoresUsed": mock_project_limits_dict["total_cores_used"], - "totalRAMUsed": mock_project_limits_dict["total_ram_used"], - "totalInstancesUsed": mock_project_limits_dict["instances_used"], - "maxSecurityGroups": mock_project_limits_dict["security_groups"], - "totalFloatingIpsUsed": mock_project_limits_dict["floating_ips_used"], - "maxTotalCores": mock_project_limits_dict["total_cores"], - "maxServerGroupMembers": mock_project_limits_dict["server_group_members"], - "maxTotalFloatingIps": mock_project_limits_dict["floating_ips"], - "totalSecurityGroupsUsed": mock_project_limits_dict["security_groups_used"], - "maxTotalInstances": mock_project_limits_dict["instances"], - "maxTotalRAMSize": mock_project_limits_dict["total_ram"], - } - - -@patch("limits_to_influx.extract_limits") -@patch("limits_to_influx.openstack") -def test_get_limits_for_project(mock_openstack, mock_extract_limits): - """ - tests get_limits_for_project gets the limits for a project by calling appropriate functions - """ - mock_instance = NonCallableMock() - mock_project_id = NonCallableMock() - - mock_conn = mock_openstack.connect.return_value - mock_conn.get_volume_limits.return_value = {"absolute": {"lim1": "val1"}} - mock_extract_limits.return_value = {"lim2": "val2"} - - res = get_limits_for_project(mock_instance, mock_project_id) - mock_openstack.connect.assert_called_once_with(mock_instance) - mock_conn.get_compute_limits.assert_called_once_with(mock_project_id) - mock_conn.get_volume_limits.assert_called_once_with(mock_project_id) - mock_extract_limits.assert_called_once_with( - mock_conn.get_compute_limits.return_value - ) - assert res == {"lim1": "val1", "lim2": "val2"} - - -@patch("limits_to_influx.openstack") -@patch("limits_to_influx.get_limits_for_project") -@patch("limits_to_influx.convert_to_data_string") -def test_get_all_limits( - mock_convert_to_data_string, mock_get_limits_for_project, mock_openstack -): - """ - tests get_all_limits function gets the limits of project appropriately - """ - mock_project_list = [ - # to be ignored - {"name": "xyz_rally", "id": "foo"}, - {"name": "844_xyz", "id": "bar"}, - # not to be ignored - {"name": "proj1", "id": "proj1-id"}, - {"name": "proj2", "id": "proj2-id"}, - ] - mock_conn_obj = mock_openstack.connect.return_value - mock_conn_obj.list_projects.return_value = mock_project_list - - mock_instance = NonCallableMock() - res = get_all_limits(mock_instance) - mock_openstack.connect.assert_called_once_with(cloud=mock_instance) - mock_conn_obj.list_projects.assert_called_once() - mock_get_limits_for_project.assert_has_calls( - [call(mock_instance, "proj1-id"), call(mock_instance, "proj2-id")] - ) - - mock_convert_to_data_string.assert_called_once_with( - mock_instance, - { - "proj1": mock_get_limits_for_project.return_value, - "proj2": mock_get_limits_for_project.return_value, - }, - ) - assert res == mock_convert_to_data_string.return_value - - -@patch("limits_to_influx.run_scrape") -@patch("limits_to_influx.parse_args") -def test_main(mock_parse_args, mock_run_scrape): - """ - tests main function calls run_scrape utility function properly - """ - mock_user_args = NonCallableMock() - main(mock_user_args) - mock_run_scrape.assert_called_once_with( - mock_parse_args.return_value, get_all_limits - ) - mock_parse_args.assert_called_once_with( - mock_user_args, description="Get All Project Limits" - ) diff --git a/MonitoringTools/tests/test_send_metric_utils.py b/MonitoringTools/tests/test_send_metric_utils.py deleted file mode 100644 index 89805449..00000000 --- a/MonitoringTools/tests/test_send_metric_utils.py +++ /dev/null @@ -1,143 +0,0 @@ -import configparser -from pathlib import Path -from unittest.mock import patch, call, NonCallableMock, MagicMock - -import pytest - -from send_metric_utils import read_config_file, post_to_influxdb, parse_args, run_scrape - - -@patch("send_metric_utils.ConfigParser") -def test_read_config_file_valid(mock_config_parser): - """ - tests read_config_file function when given a valid config file - """ - mock_config_obj = mock_config_parser.return_value - mock_config_obj.sections.return_value = ["auth", "cloud", "db"] - mock_config_obj.items.side_effect = [ - [("password", "pass"), ("username", "user")], - [("instance", "prod")], - [("database", "cloud"), ("host", "localhost:8086")], - ] - mock_filepath = NonCallableMock() - res = read_config_file(mock_filepath) - mock_config_parser.assert_called_once() - mock_config_obj.sections.assert_called_once() - mock_config_obj.items.assert_has_calls([call("auth"), call("cloud"), call("db")]) - - assert res == { - "auth.password": "pass", - "auth.username": "user", - "cloud.instance": "prod", - "db.database": "cloud", - "db.host": "localhost:8086", - } - - -@patch("send_metric_utils.ConfigParser") -def test_read_config_file_empty(mock_config_parser): - """ - tests read_config_file function when given a emtpy config file - """ - mock_config_parser.return_value.sections.return_value = [] - with pytest.raises(AssertionError): - read_config_file(NonCallableMock()) - - -@patch("send_metric_utils.requests") -def test_post_to_influxdb_valid(mock_requests): - """ - tests post_to_influxdb function uses requests.post to post data correctly - """ - mock_data_string = NonCallableMock() - mock_host = "localhost:8086" - mock_db_name = "cloud" - mock_pass = NonCallableMock() - mock_user = NonCallableMock() - - post_to_influxdb(mock_data_string, mock_host, mock_db_name, (mock_user, mock_pass)) - mock_requests.post.assert_called_once_with( - "http://localhost:8086/write?db=cloud&precision=s", - data=mock_data_string, - auth=(mock_user, mock_pass), - timeout=60, - ) - mock_response = mock_requests.post.return_value - mock_response.raise_for_status.assert_called_once() - - -@patch("send_metric_utils.requests") -def test_post_to_influxdb_empty_string(mock_requests): - """ - tests post_to_influxdb function when datastring is empty, should do nothing - """ - post_to_influxdb( - "", NonCallableMock(), NonCallableMock(), (NonCallableMock(), NonCallableMock()) - ) - mock_requests.post.assert_not_called() - - -@patch("send_metric_utils.read_config_file") -def test_parse_args_valid_args(mock_read_config_file): - """ - tests parse_args function with a valid filepath - """ - res = parse_args(["./usr/local/bin/influxdb.conf"]) - assert res == mock_read_config_file.return_value - - -def test_parse_args_filepath_does_not_exist(): - """ - tests parse_args function with invalid filepath (doesn't exist) - """ - with pytest.raises(RuntimeError): - parse_args(["./invalid-filepath"]) - - -def test_parse_args_filepath_invalid_dir_fp(): - """ - tests parse_args function with invalid filepath (points to directory) - """ - with pytest.raises(RuntimeError): - parse_args(["."]) - - -@patch("send_metric_utils.read_config_file") -def test_parse_args_filepath_read_config_fails(mock_read_config_file): - """ - tests parse_args function fails when read_config_file returns config error - """ - mock_read_config_file.side_effect = configparser.Error - with pytest.raises(RuntimeError): - parse_args(["./usr/local/bin/influxdb.conf"]) - - mock_read_config_file.assert_called_once_with(Path("./usr/local/bin/influxdb.conf")) - - -@patch("send_metric_utils.post_to_influxdb") -def test_run_scrape(mock_post_to_influxdb): - """ - Tests run_scrape function. - """ - mock_pass = NonCallableMock() - mock_user = NonCallableMock() - mock_host = NonCallableMock() - mock_db = NonCallableMock() - mock_instance = NonCallableMock() - - mock_influxdb_args = { - "auth.password": mock_pass, - "auth.username": mock_user, - "cloud.instance": mock_instance, - "db.database": mock_db, - "db.host": mock_host, - } - mock_scrape_func = MagicMock() - - run_scrape(mock_influxdb_args, mock_scrape_func) - mock_post_to_influxdb.assert_called_once_with( - mock_scrape_func.return_value, - host=mock_host, - db_name=mock_db, - auth=(mock_user, mock_pass), - ) diff --git a/MonitoringTools/tests/test_service_status_to_influx.py b/MonitoringTools/tests/test_service_status_to_influx.py deleted file mode 100644 index 925af497..00000000 --- a/MonitoringTools/tests/test_service_status_to_influx.py +++ /dev/null @@ -1,489 +0,0 @@ -from unittest.mock import patch, call, NonCallableMock, MagicMock - -from service_status_to_influx import ( - get_hypervisor_properties, - get_service_properties, - get_agent_properties, - convert_to_data_string, - get_service_prop_string, - get_all_hv_details, - update_with_service_statuses, - update_with_agent_statuses, - get_all_service_statuses, - main, -) - - -def test_get_hypervisor_properties_state_up(): - """ - tests that get_hypervisor_properties parses a valid hypervisor entry properly and extracts - useful information and returns the result in correct format - when hv state is up - """ - mock_hv = { - "state": "up", - "memory_size": 2, - "memory_used": 1, - "vcpus_used": 4, - "vcpus": 5, - } - expected_result = { - "hv": { - "aggregate": "no-aggregate", - "memorymax": 2, - "memoryused": 1, - "memoryavailable": 1, - "memperc": 50, - "cpuused": 4, - "cpumax": 5, - "cpuavailable": 1, - "cpuperc": 80, - "agent": 1, - "state": 1, - "statetext": "Up", - "utilperc": 80, - "cpufull": 0, - "memfull": 1, - "full": 1, - } - } - assert get_hypervisor_properties(mock_hv) == expected_result - - -def test_get_hypervisor_properties_state_down(): - """ - tests that get_hypervisor_properties parses a valid hypervisor entry properly and extracts - useful information and returns the result in correct format - when hv state is down - :return: - """ - mock_hv = { - "state": "down", - "memory_size": 2, - "memory_used": 1, - "vcpus_used": 4, - "vcpus": 5, - } - expected_result = { - "hv": { - "aggregate": "no-aggregate", - "memorymax": 2, - "memoryused": 1, - "memoryavailable": 1, - "memperc": 50, - "cpuused": 4, - "cpumax": 5, - "cpuavailable": 1, - "cpuperc": 80, - "agent": 1, - "state": 0, - "statetext": "Down", - "utilperc": 80, - "cpufull": 0, - "memfull": 1, - "full": 1, - } - } - assert get_hypervisor_properties(mock_hv) == expected_result - - -def test_get_service_properties_enabled_up(): - """ - tests that get_service_properties parses a valid service entry properly and extracts - useful information and returns the result in correct format - when status=enabled, state=up - """ - mock_service = {"binary": "foo", "status": "enabled", "state": "up"} - expected_result = { - "foo": { - "status": 1, - "statustext": "Enabled", - "state": 1, - "statetext": "Up", - "agent": 1, - } - } - assert get_service_properties(mock_service) == expected_result - - -def test_get_service_properties_disabled_down(): - """ - tests that get_service_properties parses a valid service entry properly and extracts - useful information and returns the result in correct format - when status=disabled, state=down - """ - mock_service = {"binary": "bar", "status": "disabled", "state": "down"} - expected_result = { - "bar": { - "status": 0, - "statustext": "Disabled", - "state": 0, - "statetext": "Down", - "agent": 1, - } - } - assert get_service_properties(mock_service) == expected_result - - -def test_get_agent_properties_alive_admin_up(): - """ - tests that get_agent_properties parses a valid network agent entry properly and extracts - useful information and returns the result in correct format - - when is_alive=True, is_admin_state_up=True - """ - mock_agent = { - "binary": "foo", - "is_alive": True, - "is_admin_state_up": True, - } - expected_result = { - "foo": { - "state": 1, - "statetext": "Up", - "status": 1, - "statustext": "Enabled", - "agent": 1, - } - } - assert get_agent_properties(mock_agent) == expected_result - - -def test_get_agent_properties_disabled_down(): - """ - tests that get_agent_properties parses a valid network agent entry properly and extracts - useful information and returns the result in correct format - - when is_alive=False, is_admin_state_up=False - """ - mock_agent = { - "binary": "bar", - "is_alive": False, - "is_admin_state_up": False, - } - expected_result = { - "bar": { - "state": 0, - "statetext": "Down", - "status": 0, - "statustext": "Disabled", - "agent": 1, - } - } - assert get_agent_properties(mock_agent) == expected_result - - -def test_convert_to_data_string_no_items(): - """ - Tests convert_to_data_string returns empty string when given no details - """ - assert convert_to_data_string(NonCallableMock(), {}) == "" - - -@patch("service_status_to_influx.get_service_prop_string") -def test_convert_to_data_string_one_hv_one_service(mock_get_service_prop_string): - """ - Tests convert_to_data_string works with single entry in details - """ - mock_instance = "prod" - mock_service_details = { - "aggregate": "ag1", - "statetext": "Up", - "statustext": "Enabled", - "prop1": "val1", - } - mock_details = {"hv1": {"service1": mock_service_details}} - - mock_get_service_prop_string.return_value = "prop1=val1" - - res = convert_to_data_string(mock_instance, mock_details) - assert ( - res == - 'ServiceStatus,host="hv1",service="service1",instance=Prod,' - 'statetext="Up",statustext="Enabled",aggregate="ag1"' - ' prop1=val1\n' - ) - mock_get_service_prop_string.assert_called_once_with({"prop1": "val1"}) - - -@patch("service_status_to_influx.get_service_prop_string") -def test_convert_to_data_string_one_hv_multi_service(mock_get_service_prop_string): - """ - Tests convert_to_data_string works with single entry in details with multiple service binaries - """ - mock_instance = "prod" - mock_service_details_1 = { - "aggregate": "ag1", - "statetext": "Up", - "statustext": "Enabled", - "prop1": "val1", - } - mock_service_details_2 = { - "aggregate": "ag2", - "statetext": "Down", - "statustext": "Disabled", - "prop1": "val2", - } - mock_details = { - "hv1": {"service1": mock_service_details_1, "service2": mock_service_details_2} - } - - mock_get_service_prop_string.side_effect = ["prop1=val1", "prop1=val2"] - - res = convert_to_data_string(mock_instance, mock_details) - assert res == ( - 'ServiceStatus,host="hv1",service="service1",instance=Prod,' - 'statetext="Up",statustext="Enabled",aggregate="ag1" ' - 'prop1=val1\n' - 'ServiceStatus,host="hv1",service="service2",instance=Prod,' - 'statetext="Down",statustext="Disabled",aggregate="ag2" ' - 'prop1=val2\n' - ) - mock_get_service_prop_string.assert_has_calls( - [call({"prop1": "val1"}), call({"prop1": "val2"})] - ) - - -@patch("service_status_to_influx.get_service_prop_string") -def test_convert_to_data_string_multi_item(mock_get_service_prop_string): - """ - Tests convert_to_data_string works with multiple entries in dict for details - """ - mock_instance = "prod" - mock_service_details_1 = { - "aggregate": "ag1", - "statetext": "Up", - "statustext": "Enabled", - "prop1": "val1", - } - mock_service_details_2 = { - "aggregate": "ag2", - "statetext": "Down", - "statustext": "Disabled", - "prop1": "val2", - } - mock_service_details_3 = { - "aggregate": "ag3", - "statetext": "Up", - "statustext": "Disabled", - "prop1": "val3", - } - - mock_details = { - "hv1": { - "service1": mock_service_details_1, - "service2": mock_service_details_2, - }, - "hv2": {"service3": mock_service_details_3}, - } - - mock_get_service_prop_string.side_effect = [ - "prop1=val1", - "prop1=val2", - "prop1=val3", - ] - - res = convert_to_data_string(mock_instance, mock_details) - assert res == ( - 'ServiceStatus,host="hv1",service="service1",instance=Prod,' - 'statetext="Up",statustext="Enabled",aggregate="ag1" ' - 'prop1=val1\n' - 'ServiceStatus,host="hv1",service="service2",instance=Prod,' - 'statetext="Down",statustext="Disabled",aggregate="ag2" ' - 'prop1=val2\n' - 'ServiceStatus,host="hv2",service="service3",instance=Prod,' - 'statetext="Up",statustext="Disabled",aggregate="ag3" ' - 'prop1=val3\n' - ) - mock_get_service_prop_string.assert_has_calls( - [ - call({"prop1": "val1"}), - call({"prop1": "val2"}), - call({"prop1": "val3"}), - ] - ) - - -def test_get_service_prop_string_empty_dict(): - """ - tests get_service_prop_string returns nothing when given empty service_dict - """ - assert get_service_prop_string({}) == "" - - -def test_get_service_prop_string(): - """ - tests get_service_prop_string returns correct prop string - it should suffix each property value with i - """ - props = {"prop1": 1, "prop2": 2, "prop3": 3} - expected_result = "prop1=1i,prop2=2i,prop3=3i" - assert get_service_prop_string(props) == expected_result - - -@patch("service_status_to_influx.get_hypervisor_properties") -def test_get_all_hv_details(mock_get_hypervisor_properties): - """ - tests get_all_hv_details returns dict of hypervisor status information - - for each hypervisor, call get_hypervisor_properties and store in a dict, - - then for each aggregate update the aggregate property for each hv with the aggregate name - that the hv belongs to - """ - mock_conn = MagicMock() - mock_hvs = [{"name": "hv1"}, {"name": "hv2"}, {"name": "hv3"}] - - mock_aggregates = [ - {"name": "ag1", "hosts": ["hv1", "hv2"]}, - {"name": "ag2", "hosts": ["hv3", "hv4"]}, - {"name": "ag3", "hosts": ["hv5"]}, - ] - - # stubs out getting props - mock_get_hypervisor_properties.side_effect = [{"hv": {}}, {"hv": {}}, {"hv": {}}] - mock_conn.list_hypervisors.return_value = mock_hvs - mock_conn.compute.aggregates.return_value = mock_aggregates - res = get_all_hv_details(mock_conn) - - mock_conn.list_hypervisors.assert_called_once() - mock_conn.compute.aggregates.assert_called_once() - - mock_get_hypervisor_properties.assert_has_calls([call(hv) for hv in mock_hvs]) - - assert res == { - "hv1": {"hv": {"aggregate": "ag1"}}, - "hv2": {"hv": {"aggregate": "ag1"}}, - "hv3": {"hv": {"aggregate": "ag2"}}, - } - - -@patch("service_status_to_influx.get_service_properties") -def test_update_with_service_statuses(mock_get_service_properties): - """ - tests update_with_service_statuses, for each service found, get its properties - and update provided dictionary status_details dict with service info - """ - mock_conn = MagicMock() - mock_status_details = { - "hv1": {"hv": {}, "foo": {}, "bar": {}}, - "hv2": {"hv": {}}, - } - - mock_services = [ - {"host": "hv1", "binary": "nova-compute"}, - {"host": "hv1", "binary": "other-svc"}, - {"host": "hv2", "binary": "other-svc"}, - {"host": "hv3", "binary": "nova-compute"}, - ] - mock_conn.compute.services.return_value = mock_services - - # stubs out actually getting properties - mock_get_service_properties.side_effect = [ - {"nova-compute": {"status": 1, "statustext": "enabled"}}, - {"other-service": {}}, - {"other-service": {"status": 1, "statustext": "enabled"}}, - {"nova-compute": {"status": 0, "statustext": "disabled"}}, - ] - - res = update_with_service_statuses(mock_conn, mock_status_details) - - mock_conn.compute.services.assert_called_once() - mock_get_service_properties.assert_has_calls([call(svc) for svc in mock_services]) - assert res == { - # shouldn't override what's already there - # add hv status == nova-compute svc status - "hv1": { - "hv": {"status": 1, "statustext": "enabled"}, - "nova-compute": {"status": 1, "statustext": "enabled"}, - "foo": {}, - "bar": {}, - "other-service": {}, - }, - # only nova-compute status adds hv status - "hv2": {"hv": {}, "other-service": {"status": 1, "statustext": "enabled"}}, - # adds what doesn't exist, no "hv" so no setting status - "hv3": {"nova-compute": {"status": 0, "statustext": "disabled"}}, - } - - -@patch("service_status_to_influx.get_agent_properties") -def test_update_with_agent_statuses(mock_get_agent_properties): - """ - tests update_with_agent_statuses, for each network agent found, get its properties - and update provided dictionary status_details dict with agent info - """ - mock_conn = MagicMock() - mock_status_details = {"hv1": {"foo": {}}, "hv2": {}} - - mock_agents = [ - {"host": "hv1", "binary": "ag1"}, - {"host": "hv1", "binary": "ag2"}, - {"host": "hv2", "binary": "ag1"}, - {"host": "hv3", "binary": "ag3"}, - ] - mock_conn.network.agents.return_value = mock_agents - - # stubs out actually getting properties - mock_get_agent_properties.side_effect = [ - {"ag1": {}}, - {"ag2": {}}, - {"ag1": {}}, - {"ag3": {}}, - ] - - res = update_with_agent_statuses(mock_conn, mock_status_details) - - mock_conn.network.agents.assert_called_once() - mock_get_agent_properties.assert_has_calls([call(agent) for agent in mock_agents]) - assert res == { - # shouldn't override what's already there - "hv1": {"foo": {}, "ag1": {}, "ag2": {}}, - "hv2": {"ag1": {}}, - # adds what doesn't exist - "hv3": {"ag3": {}}, - } - - -@patch("service_status_to_influx.openstack") -@patch("service_status_to_influx.get_all_hv_details") -@patch("service_status_to_influx.update_with_service_statuses") -@patch("service_status_to_influx.update_with_agent_statuses") -@patch("service_status_to_influx.convert_to_data_string") -def test_get_all_service_statuses( - mock_convert, - mock_get_agent_statuses, - mock_get_service_statuses, - mock_get_hv_statuses, - mock_openstack, -): - """ - Tests get_all_service_statuses calls appropriate functions: - - get hv status info - - update with service status info - - update with agent status info - - calls convert_to_data_string on result and output - """ - mock_instance = NonCallableMock() - mock_conn = mock_openstack.connect.return_value - res = get_all_service_statuses(mock_instance) - mock_openstack.connect.assert_called_once_with(mock_instance) - mock_get_hv_statuses.assert_called_once_with(mock_conn) - mock_get_service_statuses.assert_called_once_with( - mock_conn, mock_get_hv_statuses.return_value - ) - mock_get_agent_statuses.assert_called_once_with( - mock_conn, mock_get_service_statuses.return_value - ) - mock_convert.assert_called_once_with( - mock_instance, mock_get_agent_statuses.return_value - ) - assert res == mock_convert.return_value - - -@patch("service_status_to_influx.run_scrape") -@patch("service_status_to_influx.parse_args") -def test_main(mock_parse_args, mock_run_scrape): - """ - tests main function calls run_scrape utility function properly - """ - mock_user_args = NonCallableMock() - main(mock_user_args) - mock_run_scrape.assert_called_once_with( - mock_parse_args.return_value, get_all_service_statuses - ) - mock_parse_args.assert_called_once_with( - mock_user_args, description="Get All Service Statuses" - ) diff --git a/MonitoringTools/tests/test_slottifier.py b/MonitoringTools/tests/test_slottifier.py deleted file mode 100644 index 60549012..00000000 --- a/MonitoringTools/tests/test_slottifier.py +++ /dev/null @@ -1,745 +0,0 @@ -from unittest.mock import NonCallableMock, MagicMock, patch, call -from slottifier import ( - get_hv_info, - get_flavor_requirements, - get_valid_flavors_for_aggregate, - convert_to_data_string, - calculate_slots_on_hv, - get_openstack_resources, - get_all_hv_info_for_aggregate, - update_slots, - get_slottifier_details, - main, -) -import pytest - -from slottifier_entry import SlottifierEntry - - -@pytest.fixture(name="mock_hypervisors") -def mock_hypervisors_fixture(): - """fixture for setting up various mock hvs""" - return { - "hv1": { - "hypervisor_name": "hv1", - "hypervisor_status": "enabled", - "hypervisor_vcpus": 8, - "hypervisor_vcpus_used": 2, - "hypervisor_memory_size": 8192, - "hypervisor_memory_used": 2048, - }, - "hv2": { - "hypervisor_name": "hv2", - "hypervisor_status": "enabled", - "hypervisor_vcpus": 4, - "hypervisor_vcpus_used": 6, - "hypervisor_memory_size": 2048, - "hypervisor_memory_used": 4096, - }, - "hv3": { - "hypervisor_name": "hv3", - "hypervisor_status": "disabled", - }, - "hv4": { - "hypervisor_name": "hv4", - "hypervisor_status": "enabled", - "hypervisor_vcpus": "Not Found", - "hypervisor_vcpus_used": "Not Found", - "hypervisor_memory_size": "Not Found", - "hypervisor_memory_used": "Not Found", - } - } - - -@pytest.fixture(name="mock_compute_services") -def mock_service_fixture(): - """ - Returns a mock set of services to use as test data - """ - return { - "svc1": {"host": "hv1", "name": "svc1"}, - "svc2": {"host": "hv2", "name": "svc2"}, - "svc3": {"host": "hv4", "name": "svc3"}, - } - - -@pytest.fixture(name="mock_aggregate") -def mock_aggregate_fixture(): - """fixture for setting up a mock aggregate""" - - def _mock_aggregate(hosttype=None, gpu_num=None, storagetype=None): - """ - helper function for setting up mock aggregate - :param hosttype: optional hosttype to set - :param gpu_num: optional gpu_num to set - :param storagetype: optional storagetype to set - """ - aggregate = {"metadata": {}} - if hosttype: - aggregate["metadata"]["hosttype"] = hosttype - if gpu_num: - aggregate["metadata"]["gpunum"] = gpu_num - if storagetype: - aggregate["metadata"]["local-storage-type"] = storagetype - return aggregate - - return _mock_aggregate - - -@pytest.fixture(name="mock_flavors_list") -def mock_flavors_fixture(): - """fixture for setting up various mock flavors""" - return [ - {"id": 1, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, - {"id": 2, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "B"}}, - {"id": 3, "extra_specs": {}}, - {"id": 4, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, - { - "id": 5, - "extra_specs": { - "aggregate_instance_extra_specs:hosttype": "C", - "aggregate_instance_extra_specs:local-storage-type": "1", - }, - }, - { - "id": 6, - "extra_specs": { - "aggregate_instance_extra_specs:hosttype": "C", - "aggregate_instance_extra_specs:local-storage-type": "2", - }, - }, - ] - - -def test_get_hv_info_exists_and_enabled(mock_hypervisors, mock_aggregate): - """tests get_hv_info when hv exists and enabled - should parse results properly""" - - assert get_hv_info( - mock_hypervisors["hv1"], mock_aggregate(gpu_num="1"), {"status": "enabled"} - ) == { - "vcpus_available": 6, - "mem_available": 6144, - "gpu_capacity": 1, - "vcpus_capacity": 8, - "mem_capacity": 8192, - "compute_service_status": "enabled", - } - - -def test_get_hv_info_negative_results_floored(mock_hypervisors, mock_aggregate): - """ - tests get_hv_info when results for available mem/cores are negative - - should set it to 0 instead - """ - - assert get_hv_info( - mock_hypervisors["hv2"], mock_aggregate(), {"status": "enabled"} - ) == { - "vcpus_available": 0, - "mem_available": 0, - "gpu_capacity": 0, - "vcpus_capacity": 4, - "mem_capacity": 2048, - "compute_service_status": "enabled", - } - - -def test_get_hv_info_exists_but_disabled(mock_hypervisors, mock_aggregate): - """ - tests get_hv_info when hv is disabled - should return default results - """ - assert get_hv_info( - mock_hypervisors["hv3"], mock_aggregate(), {"status": "disabled"} - ) == { - "vcpus_available": 0, - "mem_available": 0, - "gpu_capacity": 0, - "vcpus_capacity": 0, - "mem_capacity": 0, - "compute_service_status": "disabled", - } - -def test_get_hv_info_but_values_are_not_found(mock_hypervisors, mock_aggregate): - """ - tests strings that contain values of "Not_Found" - should return all "Not Found" values as 0 - """ - assert get_hv_info( - mock_hypervisors["hv4"], mock_aggregate(), {"status": "enabled"} - ) == { - "vcpus_available": 0, - "mem_available": 0, - "gpu_capacity": 0, - "vcpus_capacity": 0, - "mem_capacity": 0, - "compute_service_status": "enabled", - } - - -def test_get_flavor_requirements_with_valid_flavor(): - """ - tests get_flavor_requirements with valid flavor - """ - mock_flavor = { - "extra_specs": {"accounting:gpu_num": "2"}, - "vcpus": "4", - "ram": "8192", - } - assert get_flavor_requirements(mock_flavor) == { - "gpus_required": 2, - "cores_required": 4, - "mem_required": 8192, - } - - -def test_get_flavor_requirements_with_missing_values(): - """ - tests get_flavor_requirements with all missing values - - should return 0s for requirements - """ - with pytest.raises(RuntimeError): - get_flavor_requirements({}) - - -def test_get_flavor_requirements_with_partial_values(): - """ - tests get_flavor_requirements with missing gpu_num attr - should default it to 0 - """ - req_dict = {"ram": "8192", "vcpus": 8} - assert get_flavor_requirements(req_dict) == { - "gpus_required": 0, - "cores_required": 8, - "mem_required": 8192, - } - - -def test_get_valid_flavors_with_matching_type(mock_flavors_list, mock_aggregate): - """ - test get_valid_flavors_for_aggregate should find all flavors with matching - aggregate hosttype - """ - assert get_valid_flavors_for_aggregate(mock_flavors_list, mock_aggregate("A")) == [ - {"id": 1, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, - {"id": 4, "extra_specs": {"aggregate_instance_extra_specs:hosttype": "A"}}, - ] - - -def test_get_valid_flavors_with_empty_flavors_list(mock_aggregate): - """ - test get_valid_flavors_for_aggregate should return empty list if no flavors given - """ - assert not get_valid_flavors_for_aggregate([], mock_aggregate("A")) - - -def test_get_valid_flavors_with_non_matching_hosttype( - mock_flavors_list, mock_aggregate -): - """ - test get_valid_flavors_for_aggregate should return empty list if no flavors found with - matching aggregate hosttype - """ - assert not get_valid_flavors_for_aggregate(mock_flavors_list, mock_aggregate("D")) - - -def test_get_valid_flavors_with_storagetype(mock_flavors_list, mock_aggregate): - """ - test get_valid_flavors_for_aggregate should return list of hvs with matching hosttype and storagetype - """ - assert get_valid_flavors_for_aggregate( - mock_flavors_list, mock_aggregate(hosttype="C", storagetype="1") - ) == [ - { - "id": 5, - "extra_specs": { - "aggregate_instance_extra_specs:hosttype": "C", - "aggregate_instance_extra_specs:local-storage-type": "1", - }, - }, - ] - - -def test_convert_to_data_string_no_items(): - """ - Tests convert_to_data_string returns empty string when given empty dict as slots_dict - """ - assert not convert_to_data_string(NonCallableMock(), {}) - - -def test_convert_to_data_string_one_item(): - """ - Tests convert_to_data_string works with single entry in dict for slots_dict - """ - mock_instance = "prod" - - mock_slot_info_dataclass = MagicMock() - mock_slot_info_dataclass.slots_available = "1" - mock_slot_info_dataclass.max_gpu_slots_capacity = "2" - mock_slot_info_dataclass.estimated_gpu_slots_used = "3" - mock_slot_info_dataclass.max_gpu_slots_capacity_enabled = "4" - - mock_slots_dict = {"flavor1": mock_slot_info_dataclass} - - res = convert_to_data_string(mock_instance, mock_slots_dict) - assert res == ( - "SlotsAvailable,instance=Prod,flavor=flavor1 " - "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" - ) - - -def test_convert_to_data_string_multi_item(): - """ - Tests convert_to_data_string works with multiple entries in dict for slots_dict - """ - mock_instance = "prod" - mock_slot_info_dataclass = MagicMock() - mock_slot_info_dataclass.slots_available = "1" - mock_slot_info_dataclass.max_gpu_slots_capacity = "2" - mock_slot_info_dataclass.estimated_gpu_slots_used = "3" - mock_slot_info_dataclass.max_gpu_slots_capacity_enabled = "4" - - mock_slots_dict = { - "flavor1": mock_slot_info_dataclass, - "flavor2": mock_slot_info_dataclass, - } - - res = convert_to_data_string(mock_instance, mock_slots_dict) - assert res == ( - "SlotsAvailable,instance=Prod,flavor=flavor1 " - "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" - "SlotsAvailable,instance=Prod,flavor=flavor2 " - "SlotsAvailable=1i,maxSlotsAvailable=2i,usedSlots=3i,enabledSlots=4i\n" - ) - - -def test_calculate_slots_on_hv_non_gpu_disabled(): - """ - tests calculate_slots_on_hv calculates slots properly for non-gpu flavor - - should return 0s since hv is disabled - """ - res = calculate_slots_on_hv( - "flavor1", - {"cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "disabled", - # can fit 10 slots, but should be 0 since compute service disabled - "vcpus_available": 100, - "mem_available": 100, - }, - ) - assert res.slots_available == 0 - assert res.max_gpu_slots_capacity == 0 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 0 - - -def test_calculate_slots_on_hv_gpu_no_gpunum(): - """ - tests calculate_slots_on_hv when provided a gpu flavor but gpus_required is set to 0 - should raise error - """ - with pytest.raises(RuntimeError): - calculate_slots_on_hv( - # g- specifies gpu flavor - "g-flavor1", - {"gpus_required": 0, "cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "disabled", - # can fit 10 slots, but should be 0 since compute service disabled - "vcpus_available": 100, - "mem_available": 100, - }, - ) - - -def test_calculate_slots_on_hv_gpu_disabled(): - """ - tests calculate_slots_on_hv calculates slots properly for gpu flavor - - should return 0s since hv is disabled, but keep track of max gpu slots capacity - """ - - res = calculate_slots_on_hv( - # g- specifies gpu flavor - "g-flavor1", - {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "disabled", - # can fit 10 slots, but should be 0 since compute service disabled - "vcpus_available": 100, - "mem_available": 100, - "vcpus_capacity": 100, - "mem_capacity": 100, - "gpu_capacity": 10, - }, - ) - assert res.slots_available == 0 - # still want capacity to be updated - assert res.max_gpu_slots_capacity == 10 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 0 - - -def test_calculate_slots_on_hv_mem_available_max(): - """ - tests calculate_slots_on_hv calculates slots properly for non-gpu flavor - - where memory available is limiting factor - """ - - res = calculate_slots_on_hv( - "flavor1", - {"cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "enabled", - "vcpus_available": 100, - # can fit only one slot - "mem_available": 10, - }, - ) - assert res.slots_available == 1 - assert res.max_gpu_slots_capacity == 0 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 0 - - -def test_calculate_slots_on_hv_cores_available_max(): - """ - tests calculate_slots_on_hv calculates slots properly for non-gpu flavor - - where cores available is limiting factor - """ - res = calculate_slots_on_hv( - "flavor1", - {"cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "enabled", - # can fit 10 cpu slots - "vcpus_available": 100, - "mem_available": 1000, - }, - ) - assert res.slots_available == 10 - assert res.max_gpu_slots_capacity == 0 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 0 - - -def test_calculate_slots_on_hv_gpu_available_max(): - """ - tests calculate_slots_on_hv calculates slots properly for gpu flavor - - where gpus available is limiting factor - """ - res = calculate_slots_on_hv( - # specifies a gpu flavor - "g-flavor1", - {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "enabled", - # should find only 5 slots available since gpus are the limiting factor - "gpu_capacity": 5, - "vcpus_available": 100, - "mem_available": 100, - "vcpus_capacity": 100, - "mem_capacity": 100, - }, - ) - assert res.slots_available == 5 - assert res.max_gpu_slots_capacity == 5 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 5 - - -def test_calculate_slots_on_hv_gpu_max_slots_calculated_properly(): - """ - tests calculate_slots_on_hv calculates max slots properly for gpu flavor - """ - res = calculate_slots_on_hv( - # specifies a gpu flavor - "g-flavor1", - {"gpus_required": 2, "cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "enabled", - # should find 3 slots since we require 2 gpus for each slot - "gpu_capacity": 6, - "vcpus_available": 100, - "mem_available": 100, - "vcpus_capacity": 100, - "mem_capacity": 100, - }, - ) - assert res.slots_available == 3 - assert res.max_gpu_slots_capacity == 3 - assert res.estimated_gpu_slots_used == 0 - assert res.max_gpu_slots_capacity_enabled == 3 - - -def test_calculate_slots_on_hv_calculates_used_gpu_capacity(): - """ - tests calculate_slots_on_hv calculates slots properly for gpu flavor - - should calculate estimated used gpus slots properly - """ - res = calculate_slots_on_hv( - # specifies a gpu flavor - "g-flavor1", - {"gpus_required": 1, "cores_required": 10, "mem_required": 10}, - { - "compute_service_status": "enabled", - # should find only 5 slots available since gpus are the limiting factor - "gpu_capacity": 5, - "vcpus_available": 10, - "mem_available": 10, - # there's 4 flavor slots that could have already been used - "vcpus_capacity": 50, - "mem_capacity": 50, - }, - ) - assert res.slots_available == 1 - assert res.max_gpu_slots_capacity == 5 - assert res.estimated_gpu_slots_used == 4 - assert res.max_gpu_slots_capacity_enabled == 5 - - -@patch("slottifier.openstack") -@patch("slottifier.HypervisorQuery") -def test_get_openstack_resources(mock_hypervisor_query, mock_openstack): # do I use self? - """ - tests get_openstack_resources gets all required resources via openstacksdk - and the query library and outputs them properly - """ - mock_conn = mock_openstack.connect.return_value - - #Run the mock queries - mock_hv = mock_hypervisor_query.return_value - - # Create a mock hv_props dictionary. - mock_hv.to_props.return_value = { - 'hypervisor1': {'id': [1], 'name': ['hv1']}, - 'hypervisor2': {'id': [2], 'name': ['hv2']} - } - - mock_conn.compute.aggregates.return_value = [{"name": "ag1", "id": 2}] - mock_conn.compute.services.return_value = [{"name": "svc1", "id": 3}] - mock_conn.compute.flavors.return_value = [{"name": "flv1", "id": 4}] - - mock_instance = NonCallableMock() - res = get_openstack_resources(mock_instance) - - mock_hv.select_all.assert_called_once() - mock_hv.run.assert_called_once_with(mock_instance) - mock_hv.group_by.assert_called_once_with('id') - - mock_openstack.connect.assert_called_once_with(cloud=mock_instance) - mock_conn.compute.services.assert_called_once() - mock_conn.compute.aggregates.assert_called_once() - mock_conn.compute.flavors.assert_called_once_with(get_extra_specs=True) - - assert res == { - "compute_services": [{"name": "svc1", "id": 3}], - "aggregates": [{"name": "ag1", "id": 2}], - "hypervisors": [{"name": "hv1", "id": 1}, {"name": "hv2", "id": 2}], - "flavors": [{"name": "flv1", "id": 4}], - } - - -@patch("slottifier.get_hv_info") -def test_get_all_hv_info_for_aggregate_with_valid_data( - mock_get_hv_info, mock_hypervisors, mock_compute_services -): - """ - Tests get_all_hv_info_for_aggregate with valid data. - should call get_hv_info with correct hv and service object that match aggregate and - add results to list - """ - mock_aggregate = {"hosts": ["hv1", "hv2"]} - res = get_all_hv_info_for_aggregate( - mock_aggregate, mock_compute_services.values(), mock_hypervisors.values() - ) - mock_get_hv_info.assert_has_calls( - [ - # svc1 holds host: hv1 - call( - mock_hypervisors["hv1"], mock_aggregate, mock_compute_services["svc1"] - ), - # svc2 holds host: hv2 - call( - mock_hypervisors["hv2"], mock_aggregate, mock_compute_services["svc2"] - ), - ] - ) - assert res == [mock_get_hv_info.return_value, mock_get_hv_info.return_value] - - -def test_get_all_hv_info_for_aggregate_with_invalid_data( - mock_hypervisors, mock_compute_services -): - """ - Tests get_all_hv_info_for_aggregate with invalid data. - should not add hv with invalid data to the resulting list - """ - mock_aggregate = { - "hosts": [ - # hvFoo has service but not found in list of hvs - "hvFoo", - # hvBar has no service and not in list of hvs - "hvBar", - ] - } - assert not ( - get_all_hv_info_for_aggregate( - mock_aggregate, mock_compute_services.values(), mock_hypervisors.values() - ) - ) - - -def test_get_all_hv_info_for_aggregate_with_empty_aggregate( - mock_hypervisors, mock_compute_services -): - """ - Tests get_all_hv_info_for_aggregate with aggregate with no hosts. - should do nothing and return empty list - """ - mock_aggregate = {"hosts": []} - assert not ( - get_all_hv_info_for_aggregate( - mock_aggregate, mock_hypervisors.values(), mock_compute_services.values() - ) - ) - - -@patch("slottifier.get_flavor_requirements") -@patch("slottifier.calculate_slots_on_hv") -def test_update_slots_one_flavor_one_hv( - mock_calculate_slots_on_hv, mock_get_flavor_requirements -): - """ - Tests update_slots with one flavor and one hv. - should call calculate_slots_on_hv once with the given flavor and hv - """ - mock_flavor = {"name": "flv1"} - mock_host = NonCallableMock() - - slots_dict = {"flv1": 1} - mock_calculate_slots_on_hv.return_value = 1 - res = update_slots([mock_flavor], [mock_host], slots_dict=slots_dict) - mock_get_flavor_requirements.assert_called_once_with(mock_flavor) - mock_calculate_slots_on_hv.assert_called_once_with( - "flv1", mock_get_flavor_requirements.return_value, mock_host - ) - assert res == {"flv1": 2} - - -@patch("slottifier.get_flavor_requirements") -@patch("slottifier.calculate_slots_on_hv") -def test_update_slots_one_flavor_multi_hv( - mock_calculate_slots_on_hv, mock_get_flavor_requirements -): - """ - Tests update_slots with one flavor and multiple hvs. - should call calculate_slots_on_hv on each hv with the same flavor - """ - mock_flavor = {"name": "flv1"} - mock_host_1 = NonCallableMock() - mock_host_2 = NonCallableMock() - slots_dict = {"flv1": 1} - mock_calculate_slots_on_hv.side_effect = [1, 2] - res = update_slots([mock_flavor], [mock_host_1, mock_host_2], slots_dict=slots_dict) - mock_get_flavor_requirements.assert_called_once_with(mock_flavor) - mock_calculate_slots_on_hv.assert_has_calls( - [ - call("flv1", mock_get_flavor_requirements.return_value, mock_host_1), - call("flv1", mock_get_flavor_requirements.return_value, mock_host_2), - ] - ) - assert res == {"flv1": 4} - - -@patch("slottifier.get_flavor_requirements") -@patch("slottifier.calculate_slots_on_hv") -def test_update_slots_multi_flavor_multi_hv( - mock_calculate_slots_on_hv, mock_get_flavor_requirements -): - """ - Tests update_slots with multiple flavors and multiple hvs. - should call calculate_slots_on_hv with each unique hv-flavor pairings - """ - mock_flavor_1 = {"name": "flv1"} - mock_flavor_2 = {"name": "flv2"} - mock_host_1 = NonCallableMock() - mock_host_2 = NonCallableMock() - slots_dict = {"flv1": 1, "flv2": 0} - mock_calculate_slots_on_hv.side_effect = [1, 2, 0, 0] - res = update_slots( - [mock_flavor_1, mock_flavor_2], - [mock_host_1, mock_host_2], - slots_dict=slots_dict, - ) - mock_get_flavor_requirements.assert_has_calls( - [call(mock_flavor_1), call(mock_flavor_2)] - ) - mock_calculate_slots_on_hv.assert_has_calls( - [ - call("flv1", mock_get_flavor_requirements.return_value, mock_host_1), - call("flv1", mock_get_flavor_requirements.return_value, mock_host_2), - call("flv2", mock_get_flavor_requirements.return_value, mock_host_1), - call("flv2", mock_get_flavor_requirements.return_value, mock_host_2), - ] - ) - assert res == {"flv1": 4, "flv2": 0} - - -@patch("slottifier.get_openstack_resources") -@patch("slottifier.get_valid_flavors_for_aggregate") -@patch("slottifier.get_all_hv_info_for_aggregate") -@patch("slottifier.update_slots") -@patch("slottifier.convert_to_data_string") -def test_get_slottifier_details_one_aggregate( - mock_convert_to_data_string, - mock_update_slots, - mock_get_all_hv_info_for_aggregate, - mock_get_valid_flavors_for_aggregate, - mock_get_openstack_resources, -): - """ - Tests get_slottifier_details with one aggregate. - """ - mock_instance = NonCallableMock() - mock_flavors = [{"name": "flv1"}, {"name": "flv2"}] - mock_compute_services = NonCallableMock() - mock_hypervisors = NonCallableMock() - - mock_get_openstack_resources.return_value = { - "aggregates": ["ag1"], - "flavors": mock_flavors, - "compute_services": mock_compute_services, - "hypervisors": mock_hypervisors, - } - res = get_slottifier_details(mock_instance) - mock_get_openstack_resources.assert_called_once_with(mock_instance) - mock_get_valid_flavors_for_aggregate.assert_called_once_with(mock_flavors, "ag1") - mock_get_all_hv_info_for_aggregate.assert_called_once_with( - "ag1", mock_compute_services, mock_hypervisors - ) - - mock_update_slots.assert_called_once_with( - mock_get_valid_flavors_for_aggregate.return_value, - mock_get_all_hv_info_for_aggregate.return_value, - {"flv1": SlottifierEntry(), "flv2": SlottifierEntry()}, - ) - - mock_convert_to_data_string.assert_called_once_with( - mock_instance, mock_update_slots.return_value - ) - assert res == mock_convert_to_data_string.return_value - - -@patch("slottifier.run_scrape") -@patch("slottifier.parse_args") -def test_main(mock_parse_args, mock_run_scrape): - """ - tests main function calls run_scrape utility function properly - """ - mock_user_args = NonCallableMock() - main(mock_user_args) - mock_run_scrape.assert_called_once_with( - mock_parse_args.return_value, get_slottifier_details - ) - mock_parse_args.assert_called_once_with( - mock_user_args, description="Get All Service Statuses" - ) diff --git a/MonitoringTools/tests/test_slottifier_entry.py b/MonitoringTools/tests/test_slottifier_entry.py deleted file mode 100644 index 35261a52..00000000 --- a/MonitoringTools/tests/test_slottifier_entry.py +++ /dev/null @@ -1,27 +0,0 @@ -from slottifier_entry import SlottifierEntry - - -def test_add(): - """ - test that adding two SlottifierEntry dataclasses works properly - """ - fst = SlottifierEntry( - slots_available=1, - estimated_gpu_slots_used=1, - max_gpu_slots_capacity=1, - max_gpu_slots_capacity_enabled=1, - ) - - snd = SlottifierEntry( - slots_available=2, - estimated_gpu_slots_used=3, - max_gpu_slots_capacity=4, - max_gpu_slots_capacity_enabled=5, - ) - - assert fst + snd == SlottifierEntry( - slots_available=3, - estimated_gpu_slots_used=4, - max_gpu_slots_capacity=5, - max_gpu_slots_capacity_enabled=6, - ) diff --git a/MonitoringTools/usr/local/bin/collect_vm_stats.py b/MonitoringTools/usr/local/bin/collect_vm_stats.py deleted file mode 100644 index d800e8eb..00000000 --- a/MonitoringTools/usr/local/bin/collect_vm_stats.py +++ /dev/null @@ -1,173 +0,0 @@ -import sys -from typing import List, Dict, Optional - -from openstack import connect -from send_metric_utils import run_scrape, parse_args - - -def server_obj_to_len(server_obj) -> int: - """ - Method that gets the length of a generator object - :param server_obj: OpenStack generator object from a query - :return: Integer for the length of the object i.e. number of results - """ - generator_list = list(server_obj) - total_results = len(generator_list) - return total_results - - -def run_server_query( - conn: connect, - filters: Optional[Dict], - page_size: int = 1000, - call_limit: int = 1000, -) -> List: - """ - Helper method for running server query using pagination - openstacksdk calls - can only return a maximum number of values - (set by limit) and to continue getting values - we need to run another call pass a "marker" value of the last - item seen - :param conn: OpenStack cloud connection - :param filters: A dictionary of filters to run on the query (server-side) - :param page_size: (Default 1000) how many items are returned by single call - :param call_limit: (Default 1000) max number of paging iterations. - - this is required to mitigate some bugs where successive paging loops back on itself - leading to endless calls - :return: A list of server objects - """ - - pagination_filters = {"limit": page_size, "marker": None} - if not filters: - filters = {} - - new_filters = {**filters, **pagination_filters} - query_res = [] - - curr_marker = None - num_calls = 0 - while True: - if num_calls > call_limit: - break - - for i, server in enumerate( - conn.compute.servers(details=False, all_projects=True, **new_filters) - ): - query_res.append(server) - - # openstacksdk calls break after going over pagination limit - if i == page_size - 1: - # restart the for loop with marker set - new_filters.update({"marker": server["id"]}) - break - - # if marker hasn't changed, then has query terminated - if new_filters["marker"] == curr_marker: - break - - # set marker as current - curr_marker = new_filters["marker"] - num_calls += 1 - return query_res - - -def number_servers_total(conn: connect) -> int: - """ - Query an OpenStack Cloud to find the total number of instances across - all projects. - :param conn: OpenStack cloud connection - :returns: Number of VMs in total across the cloud - """ - server_obj = run_server_query(conn, None) - # get number of items in generator object - total_instances = server_obj_to_len(server_obj) - return total_instances - - -def number_servers_active(conn: connect) -> int: - """ - Query an OpenStack Cloud to find the number of instances in - ACTIVE state. - :param conn: OpenStack Cloud Connection - :returns: Number of active VMs - """ - server_obj = run_server_query(conn, {"status": "ACTIVE"}) - # get number of items in generator object - instance_active = server_obj_to_len(server_obj) - return instance_active - - -def number_servers_build(conn: connect) -> int: - """ - Query an OpenStack Cloud to find the number of instances in - BUILD state. - :param conn: OpenStack Cloud Connection - :returns: Number of VMs in BUILD state - """ - server_obj = run_server_query(conn, {"status": "BUILD"}) - # get number of items in generator object - instance_build = server_obj_to_len(server_obj) - return instance_build - - -def number_servers_error(conn: connect) -> int: - """ - Query an OpenStack Cloud to find the number of instances in - ERROR state. - :param conn: OpenStack Cloud Connection - :returns: Number of VMs in ERROR state - """ - server_obj = run_server_query(conn, {"status": "ERROR"}) - # get number of items in generator object - instance_err = server_obj_to_len(server_obj) - return instance_err - - -def number_servers_shutoff(conn: connect) -> int: - """ - Query an OpenStack Cloud to find the number of instances in - SHUTOFF state. - :param conn: OpenStack Cloud Connection - :returns: Number of VMs in SHUTOFF (STOPPED) state - """ - server_obj = run_server_query(conn, {"status": "SHUTOFF"}) - # get number of items in generator object - instance_shutoff = server_obj_to_len(server_obj) - return instance_shutoff - - -def get_all_server_statuses(cloud_name: str) -> str: - """ - Collects the stats for vms and returns a dict - :param cloud_name: Name of OpenStack cloud to connect to - :return: A comma separated string containing VM states. - """ - - # connect to an OpenStack cloud - conn = connect(cloud=cloud_name) - # collect stats in order: total, active, build, error, shutoff - total_vms = number_servers_total(conn) - active_vms = number_servers_active(conn) - build_vms = number_servers_build(conn) - error_vms = number_servers_error(conn) - shutoff_vms = number_servers_shutoff(conn) - - server_statuses = ( - f"VMStats,instance={cloud_name.capitalize()} " - f"totalVM={total_vms}i,activeVM={active_vms}i," - f"buildVM={build_vms}i,errorVM={error_vms}i," - f"shutoffVM={shutoff_vms}i" - ) - - return server_statuses - - -def main(user_args: List): - """ - Main method to collect server statuses for an influxDB instance - """ - influxdb_args = parse_args(user_args, description="Get All VM Statuses") - run_scrape(influxdb_args, get_all_server_statuses) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/influxdb.conf b/MonitoringTools/usr/local/bin/influxdb.conf deleted file mode 100644 index 41b14cdd..00000000 --- a/MonitoringTools/usr/local/bin/influxdb.conf +++ /dev/null @@ -1,12 +0,0 @@ -[auth] -# auth for influxdb -password=admin -username=admin - -[cloud] -# requires /etc/openstack/clouds.yaml with "prod" cloud account -instance=prod - -[db] -database=cloud -host=localhost:8086 diff --git a/MonitoringTools/usr/local/bin/limits_to_influx.py b/MonitoringTools/usr/local/bin/limits_to_influx.py deleted file mode 100755 index 886bd477..00000000 --- a/MonitoringTools/usr/local/bin/limits_to_influx.py +++ /dev/null @@ -1,129 +0,0 @@ -import sys -from typing import Dict, List -import openstack -from openstack.identity.v3.project import Project -from send_metric_utils import run_scrape, parse_args - - -def convert_to_data_string(instance: str, limit_details: Dict) -> str: - """ - converts a dictionary of values into a data-string influxdb can read - :param instance: which cloud the info was scraped from (prod or dev) - :param limit_details: a dictionary of values to convert to string - :return: a comma-separated string of key=value taken from input dictionary - """ - data_string = "" - for project_name, limit_entry in limit_details.items(): - parsed_project_name = project_name.replace(" ", "\ ") - data_string += ( - f'Limits,Project="{parsed_project_name}",' - f"instance={instance.capitalize()} " - f"{get_limit_prop_string(limit_entry)}\n" - ) - return data_string - - -def get_limit_prop_string(limit_details): - """ - This function is a helper function that creates a partial data string of just the - properties scraped for a single service - :param limit_details: properties scraped for a single project - :return: a data string of scraped info - """ - # all limit properties are integers so add 'i' for each value - limit_strings = [] - for limit, val in limit_details.items(): - limit_strings.append(f"{limit}={val}i") - return ",".join(limit_strings) - - -def extract_limits(limits_dict) -> Dict: - """ - helper function to get info from - :param limits_dict: a dictionary of project limits to extract useful properties from - :return: a dictionary of useful properties with keys that match expected keys in influxdb - """ - # the keys need changing to match legacy data when we used the openstack-cli - mappings = { - "server_meta": "maxServerMeta", - "personality": "maxPersonality", - "server_groups_used": "totalServerGroupsUsed", - "image_meta": "maxImageMeta", - "personality_size": "maxPersonalitySize", - "keypairs": "maxTotalKeypairs", - "security_group_rules": "maxSecurityGroupRules", - "server_groups": "maxServerGroups", - "total_cores_used": "totalCoresUsed", - "total_ram_used": "totalRAMUsed", - "instances_used": "totalInstancesUsed", - "security_groups": "maxSecurityGroups", - "floating_ips_used": "totalFloatingIpsUsed", - "total_cores": "maxTotalCores", - "server_group_members": "maxServerGroupMembers", - "floating_ips": "maxTotalFloatingIps", - "security_groups_used": "totalSecurityGroupsUsed", - "instances": "maxTotalInstances", - "total_ram": "maxTotalRAMSize", - } - parsed_limits = {} - for key, val in mappings.items(): - try: - parsed_limits[val] = limits_dict[key] - except KeyError as exp: - raise RuntimeError(f"could not find {key} in project limits") from exp - return parsed_limits - - -def get_limits_for_project(instance: str, project_id) -> Dict: - """ - Get limits for a project. This is currently using openstack-cli - This will be rewritten to instead use openstacksdk - :param instance: cloud we want to scrape from - :param project_id: project id we want to collect limits for - :return: a set of limit properties for project we want - """ - conn = openstack.connect(instance) - project_details = { - **extract_limits(conn.get_compute_limits(project_id)), - **conn.get_volume_limits(project_id)["absolute"], - } - return project_details - - -def is_valid_project(project: Project) -> bool: - """ - helper function which returns if project is valid to get limits for - :param project: project to check - :return: boolean, True if project should be accounted for in limits - """ - invalid_strings = ["_rally", "844"] - return all(string not in project["name"] for string in invalid_strings) - - -def get_all_limits(instance: str) -> str: - """ - This function gets limits for each project on openstack - :param instance: which cloud to scrape from (prod or dev) - :return: A data string of scraped info - """ - conn = openstack.connect(cloud=instance) - limit_details = {} - for project in conn.list_projects(): - if is_valid_project(project): - limit_details[project["name"]] = get_limits_for_project( - instance, project["id"] - ) - return convert_to_data_string(instance, limit_details) - - -def main(user_args: List): - """ - send limits to influx - :param user_args: args passed into script by user - """ - influxdb_args = parse_args(user_args, description="Get All Project Limits") - run_scrape(influxdb_args, get_all_limits) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/send_metric_utils.py b/MonitoringTools/usr/local/bin/send_metric_utils.py deleted file mode 100644 index 531195f9..00000000 --- a/MonitoringTools/usr/local/bin/send_metric_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -import configparser -from configparser import ConfigParser -from typing import Dict, Tuple, Callable -from pathlib import Path -import argparse -import requests - - -def read_config_file(config_filepath: Path) -> Dict: - """ - This function reads a config file and puts it into a dictionary - :param config_filepath: - :return: A flattened dictionary containing key-value pairs from config file - """ - config = ConfigParser() - config.read(config_filepath) - config_dict = {} - for section in config.sections(): - for key, value in config.items(section): - config_dict[f"{section}.{key}"] = value - - required_values = [ - "auth.password", - "auth.username", - "cloud.instance", - "db.database", - "db.host", - ] - assert all( - val in config_dict for val in required_values - ), "Config file is missing required values." - return config_dict - - -def post_to_influxdb( - data_string: str, host: str, db_name: str, auth: Tuple[str, str] -) -> None: - """ - This function posts information to influxdb - :param data_string: data to write - :param host: hostname and port where influxdb can be accessed - :param db_name: database name to write to - :param auth: tuple of (username, password) to authenticate with influxdb - """ - if not data_string: - return - - url = f"http://{host}/write?db={db_name}&precision=s" - response = requests.post(url, data=data_string, auth=auth, timeout=60) - response.raise_for_status() - - -def parse_args(inp_args, description: str = "scrape metrics script") -> Dict: - """ - This function parses influxdb args from a filepath passed into script when its run. - The only thing the scripts takes as input is the path to the config file. - :param description: The description of the script to print on help command - :param inp_args: input arguments passed when a 'gather metrics' script is run - :return: args from - """ - - parser = argparse.ArgumentParser(description=description) - parser.add_argument( - "config_filepath", type=Path, help="Path to influxdb config file" - ) - try: - args = parser.parse_args(inp_args) - except argparse.ArgumentTypeError as exp: - raise RuntimeError("Error reading input arguments") from exp - - if not args.config_filepath.is_file(): - raise RuntimeError(f"Invalid filepath given '{args.config_filepath}'") - - try: - return read_config_file(args.config_filepath) - except configparser.Error as exp: - raise RuntimeError( - f"could not read influxdb config file '{args.config_filepath}'" - ) from exp - - -def run_scrape(influxdb_args, scrape_func: Callable[[str], str]): - """ - run script to scrape info and post to influxdb - :param influxdb_args: set of args passed in by user upon running script - :param scrape_func: function to use to scrape info - """ - scrape_res = scrape_func(influxdb_args["cloud.instance"]) - post_to_influxdb( - scrape_res, - host=influxdb_args["db.host"], - db_name=influxdb_args["db.database"], - auth=(influxdb_args["auth.username"], influxdb_args["auth.password"]), - ) diff --git a/MonitoringTools/usr/local/bin/service_status_to_influx.py b/MonitoringTools/usr/local/bin/service_status_to_influx.py deleted file mode 100644 index 25c433cc..00000000 --- a/MonitoringTools/usr/local/bin/service_status_to_influx.py +++ /dev/null @@ -1,206 +0,0 @@ -import sys -from typing import Dict, List -import openstack -from openstack.compute.v2.hypervisor import Hypervisor -from openstack.compute.v2.service import Service -from openstack.network.v2.agent import Agent -from send_metric_utils import run_scrape, parse_args - - -def get_hypervisor_properties(hypervisor: Hypervisor) -> Dict: - """ - This function parses a openstacksdk Hypervisor object to get properties in the correct format - to feed into influxdb - :param hypervisor: hypervisor to extract properties from - :return: A dictionary of useful properties - """ - hv_prop_dict = { - "hv": { - # this is populated by another command - "aggregate": "no-aggregate", - "memorymax": hypervisor["memory_size"], - "memoryused": hypervisor["memory_used"], - "memoryavailable": hypervisor["memory_size"] - hypervisor["memory_used"], - "memperc": round((hypervisor["memory_used"] / hypervisor["memory_size"]) * 100), - "cpumax": hypervisor["vcpus"], - "cpuused": hypervisor["vcpus_used"], - "cpuavailable": hypervisor["vcpus"] - hypervisor["vcpus_used"], - "cpuperc": round((hypervisor["vcpus_used"] / hypervisor["vcpus"]) * 100), - "agent": 1, - "state": 1 if hypervisor["state"] == "up" else 0, - "statetext": hypervisor["state"].capitalize(), - } - } - hv_info = hv_prop_dict["hv"] - - hv_info["utilperc"] = max(hv_info["cpuperc"], hv_info["memperc"]) - hv_info["cpufull"] = 1 if hv_info["cpuperc"] >= 97 else 0 - hv_info["memfull"] = 1 if hv_info["memoryavailable"] <= 8192 else 0 - hv_info["full"] = int(hv_info["memfull"] or hv_info["cpufull"]) - - return hv_prop_dict - - -def get_service_properties(service: Service) -> Dict: - """ - This function parses a openstacksdk Service object to get properties in the correct format - to feed into influxdb - :param service: service to extract properties from - :return: A dictionary of useful properties - """ - service_prop_dict = { - service["binary"]: { - "agent": 1, - "status": 1 if service["status"] == "enabled" else 0, - "statustext": service["status"].capitalize(), - "state": 1 if service["state"] == "up" else 0, - "statetext": service["state"].capitalize(), - } - } - return service_prop_dict - - -def get_agent_properties(agent: Agent) -> Dict: - """ - This function parses a openstacksdk Agent object to get properties in the correct format - to feed into influxdb - :param agent: agent to extract properties from - :return: A dictionary of useful properties - """ - agent_prop_dict = { - agent["binary"]: { - "agent": 1, - "state": 1 if agent["is_alive"] else 0, - "statetext": "Up" if agent["is_alive"] else "Down", - "status": 1 if agent["is_admin_state_up"] else 0, - "statustext": "Enabled" if agent["is_admin_state_up"] else "Disabled", - } - } - return agent_prop_dict - - -def convert_to_data_string(instance: str, service_details: Dict) -> str: - """ - This function creates a data string from service properties to feed into influxdb - :param instance: the cloud instance (prod or dev) that details were scraped from - :param service_details: a set of service properties to parse - :return: A data string of scraped info - """ - data_string = "" - for hypervisor_name, services in service_details.items(): - for service_binary, service_stats in services.items(): - statustext = service_stats.pop("statustext") - statetext = service_stats.pop("statetext") - new_data_string = ( - f'ServiceStatus' - f',host="{hypervisor_name}"' - f',service="{service_binary}"' - f',instance={instance.capitalize()}' - f',statetext="{statetext}"' - f',statustext="{statustext}"' - ) - - aggregate = service_stats.pop("aggregate", None) - if aggregate: - new_data_string += f',aggregate="{aggregate}"' - - new_data_string += f" {get_service_prop_string(service_stats)}\n" - data_string += new_data_string - - return data_string - - -def get_service_prop_string(service_dict: Dict) -> str: - """ - This function is a helper function that creates a partial data string of just the - properties scraped for a single service - :param service_dict: properties scraped for a single service - :return: a data string of scraped info - """ - stats_strings = [] - for stat, val in service_dict.items(): - stats_strings.append(f"{stat}={val}i") - return ",".join(stats_strings) - - -def get_all_hv_details(conn) -> Dict: - """ - Get all hypervisor status information from openstack - :param conn: openstack connection object - :return: a dictionary of hypervisor status information - """ - hv_details = {} - for hypervisor in conn.list_hypervisors(): - hv_details[hypervisor["name"]] = get_hypervisor_properties(hypervisor) - - # populate found hypervisors with what aggregate they belong to - so we can filter by aggregate in grafana - for aggregate in conn.compute.aggregates(): - for host_name in aggregate["hosts"]: - if host_name in hv_details: - hv_details[host_name]["hv"]["aggregate"] = aggregate["name"] - return hv_details - - -def update_with_service_statuses(conn, status_details: Dict) -> Dict: - """ - update status details with service status information from openstack - :param conn: openstack connection object - :param status_details: status details dictionary to update - :return: a dictionary of updated status information with service statuses - """ - for service in conn.compute.services(): - if service["host"] not in status_details.keys(): - status_details[service["host"]] = {} - - service_host = status_details[service["host"]] - service_host.update(get_service_properties(service)) - if "hv" in service_host and service["binary"] == "nova-compute": - service_host["hv"]["status"] = service_host["nova-compute"]["status"] - service_host["hv"]["statustext"] = service_host["nova-compute"][ - "statustext" - ] - - return status_details - - -def update_with_agent_statuses(conn, status_details: Dict) -> Dict: - """ - update status details with network agent status information from openstack - :param conn: openstack connection object - :param status_details: status details dictionary to update - :return: a dictionary of updated status information with network agent statuses - """ - for agent in conn.network.agents(): - if agent["host"] not in status_details.keys(): - status_details[agent["host"]] = {} - - status_details[agent["host"]].update(get_agent_properties(agent)) - - return status_details - - -def get_all_service_statuses(instance: str) -> str: - """ - This function gets status information for each service node, hypervisor and network - agent in openstack. - :param instance: which cloud to scrape from (prod or dev) - :return: A data string of scraped info - """ - conn = openstack.connect(instance) - all_details = get_all_hv_details(conn) - all_details = update_with_service_statuses(conn, all_details) - all_details = update_with_agent_statuses(conn, all_details) - return convert_to_data_string(instance, all_details) - - -def main(user_args: List): - """ - send service status info to influx - :param user_args: args passed into script by user - """ - influxdb_args = parse_args(user_args, description="Get All Service Statuses") - run_scrape(influxdb_args, get_all_service_statuses) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/slottifier.py b/MonitoringTools/usr/local/bin/slottifier.py deleted file mode 100644 index dd5a682f..00000000 --- a/MonitoringTools/usr/local/bin/slottifier.py +++ /dev/null @@ -1,356 +0,0 @@ -import sys -from typing import List, Dict -import openstack -from slottifier_entry import SlottifierEntry -from send_metric_utils import parse_args, run_scrape -from openstackquery import HypervisorQuery - - -def get_hv_info(hypervisor: Dict, aggregate_info: Dict, service_info: Dict) -> Dict: - """ - Helper function to get hv information on vcpus/memory available - :param hypervisor: a dictionary holding info on hypervisor - :param aggregate_info: a dictionary holding info on aggregate hypervisor belongs to - :param service_info: a dictionary holding info on nova compute service running on hypervisor - :return: a dictionary of vcpus/memory available for given hv - """ - hv_info = { - "vcpus_available": 0, - "mem_available": 0, - "gpu_capacity": 0, - "vcpus_capacity": 0, - "mem_capacity": 0, - "compute_service_status": "disabled", - } - - vcpus = hypervisor.get("hypervisor_vcpus", "0") - vcpus_used = hypervisor.get("hypervisor_vcpus_used", "0") - - # Only convert to int if the value is not 'Not Found' - if vcpus != "Not Found": - vcpus = int(vcpus) - else: - vcpus = 0 - - if vcpus_used != "Not Found": - vcpus_used = int(vcpus_used) - else: - vcpus_used = 0 - - memory_size = hypervisor.get("hypervisor_memory_size", "0") - memory_used = hypervisor.get("hypervisor_memory_used", "0") - - if memory_size != "Not Found": - memory_size = int(memory_size) - else: - memory_size = 0 - - if memory_used != "Not Found": - memory_used = int(memory_used) - else: - memory_used = 0 - - if hypervisor and hypervisor.get("hypervisor_status") != "disabled": - hv_info["vcpus_available"] = max(0, vcpus - vcpus_used) - hv_info["mem_available"] = max(0, memory_size - memory_used) - hv_info["vcpus_capacity"] = vcpus - hv_info["mem_capacity"] = memory_size - - hv_info["gpu_capacity"] = int(aggregate_info["metadata"].get("gpunum", "0")) - hv_info["compute_service_status"] = service_info["status"] - - return hv_info - -def get_flavor_requirements(flavor: Dict) -> Dict: - """ - Helper function to get flavor memory/ram/gpu requirements for a VM of that type to be built on a hv - :param flavor: flavor to get requirements from - :return: dictionary of requirements - """ - try: - flavor_reqs = { - "cores_required": int(flavor["vcpus"]), - "mem_required": int(flavor["ram"]), - } - except (ValueError, KeyError) as exp: - flavor_name = flavor.get("name", "Name Not Found") - raise RuntimeError( - f"could not get flavor requirements for flavor {flavor_name}" - ) from exp - - flavor_reqs.update( - { - "gpus_required": int( - flavor.get("extra_specs", {}).get("accounting:gpu_num", 0) - ), - } - ) - return flavor_reqs - - -def get_valid_flavors_for_aggregate(flavor_list: List, aggregate: Dict) -> List: - """ - Helper function that filters a list of flavors - to find those that can be built on a hv belonging to a given aggregate - :param flavor_list: a list of flavors to check - :param aggregate: specifies the aggregate to find compatible flavors for - :return: a list of valid flavors for hosttype - """ - valid_flavors = [] - hypervisor_hosttype = aggregate["metadata"].get("hosttype", None) - hypervisor_storage_type = aggregate["metadata"].get("local-storage-type", None) - - if not hypervisor_hosttype: - return valid_flavors - - for flavor in flavor_list: - # validate that flavor can be used on host aggregate - if ( - "aggregate_instance_extra_specs:hosttype" - not in flavor["extra_specs"].keys() - ): - continue - if ( - flavor["extra_specs"]["aggregate_instance_extra_specs:hosttype"] - != hypervisor_hosttype - ): - continue - - has_local_storage = ( - "aggregate_instance_extra_specs:local-storage-type" - in flavor["extra_specs"].keys() - ) - - if ( - has_local_storage - and flavor["extra_specs"][ - "aggregate_instance_extra_specs:local-storage-type" - ] - != hypervisor_storage_type - ): - continue - - valid_flavors.append(flavor) - return valid_flavors - - -def convert_to_data_string(instance: str, slots_dict: Dict) -> str: - """ - converts a dictionary of values into a data-string influxdb can read - :param slots_dict: a dictionary of slots available for each flavor - :param instance: which cloud the info was scraped from (prod or dev) - :return: a comma-separated string of key=value taken from input dictionary - """ - data_string = "" - for flavor, slot_info in slots_dict.items(): - data_string += ( - f"SlotsAvailable,instance={instance.capitalize()},flavor={flavor}" - f" SlotsAvailable={slot_info.slots_available}i" - f",maxSlotsAvailable={slot_info.max_gpu_slots_capacity}i" - f",usedSlots={slot_info.estimated_gpu_slots_used}i" - f",enabledSlots={slot_info.max_gpu_slots_capacity_enabled}i\n" - ) - return data_string - - -def calculate_slots_on_hv( - flavor_name: str, flavor_reqs: Dict, hv_info: Dict -) -> SlottifierEntry: - """ - Helper function that calculates available slots for a flavor on a given hypervisor - :param flavor_name: name of flavor - :param flavor_reqs: dictionary of memory, cpu, and gpu requirements of flavor - :param hv_info: dictionary of memory, cpu, and gpu capacity/availability on hypervisor - and whether hv compute service is enabled - :return: A dataclass holding slottifer information to update with - """ - slots_dataclass = SlottifierEntry() - - slots_available = min( - hv_info["vcpus_available"] // flavor_reqs["cores_required"], - hv_info["mem_available"] // flavor_reqs["mem_required"], - ) - - if "g-" in flavor_name: - # workaround for bugs where gpu number not specified - if flavor_reqs["gpus_required"] == 0: - raise RuntimeError( - f"gpu flavor {flavor_name} does not have 'gpunum' metadata" - ) - - theoretical_gpu_slots_available = min( - hv_info["gpu_capacity"] // flavor_reqs["gpus_required"], - hv_info["vcpus_capacity"] // flavor_reqs["cores_required"], - hv_info["mem_capacity"] // flavor_reqs["mem_required"], - ) - - estimated_slots_used = ( - min( - hv_info["vcpus_capacity"] // flavor_reqs["cores_required"], - hv_info["mem_capacity"] // flavor_reqs["mem_required"], - ) - - slots_available - ) - - # estimated number of GPU slots used - based off of how much cpu/mem is currently being used - # assumes that all VMs on the HV contains only this flavor - which may not be true - # if slots used is greater than gpu slots available we assume all gpus are being used - slots_dataclass.estimated_gpu_slots_used = min( - theoretical_gpu_slots_available, estimated_slots_used - ) - - slots_dataclass.max_gpu_slots_capacity = theoretical_gpu_slots_available - - if hv_info["compute_service_status"] == "enabled": - slots_dataclass.max_gpu_slots_capacity_enabled = ( - theoretical_gpu_slots_available - ) - - slots_available = min( - slots_available, - theoretical_gpu_slots_available - slots_dataclass.estimated_gpu_slots_used, - ) - - if hv_info["compute_service_status"] == "enabled": - slots_dataclass.slots_available = slots_available - return slots_dataclass - - -def get_openstack_resources(instance: str) -> Dict: - """ - This is a helper function that gets information from openstack in one go to calculate flavor slots - This is quicker than getting resources one at a time - It queries the Query Library for all hypervisors within the instance. - :param instance: which cloud to calculate slots for - :return: a dictionary containing 4 entries, key is an openstack component, - value is a list of all components of that - type: compute_services, aggregates, hypervisors and flavors - """ - conn = openstack.connect(cloud=instance) - - # we get all openstack info first because it is quicker than getting them one at a time - # dictionaries prevent duplicates - - all_compute_services = { - service["id"]: service for service in conn.compute.services() - } - all_aggregates = { - aggregate["id"]: aggregate for aggregate in conn.compute.aggregates() - } - - # Querying the query library - hv = HypervisorQuery() - hv.select_all() - hv.run(instance) - hv.group_by("id") - # Flattens the incoming list of dictionaries, into a dictionary of lists - hv_props = hv.to_props(flatten=True) - - all_hypervisors = {} - for hypervisor, hv_info in hv_props.items(): - for k, v in hv_info.items(): - hv_info[k] = v[0] - all_hypervisors[hypervisor] = hv_info - - all_flavors = { - flavor["id"]: flavor for flavor in conn.compute.flavors(get_extra_specs=True) - } - - return { - "compute_services": list(all_compute_services.values()), - "aggregates": list(all_aggregates.values()), - "hypervisors": list(all_hypervisors.values()), - "flavors": list(all_flavors.values()), - } - - -def get_all_hv_info_for_aggregate( - aggregate: Dict, all_compute_services: List, all_hypervisors: List -) -> List: - """ - helper function to get all useful info from hypervisors belonging to a given aggregate - :param aggregate: aggregate that we want to get hvs for - :param all_compute_services: all compute services to validate hvs against - - ensure they have a nova_compute service attached - :param all_hypervisors: all hypervisors to get hv info from - :return: list of dictionaries of hypervisor information for calculating slots - """ - - valid_hvs = [] - for host in aggregate["hosts"]: - host_compute_service = None - for compute_service in all_compute_services: - if compute_service["host"] == host: - host_compute_service = compute_service - - if not host_compute_service: - continue - - hv_obj = None - for hypervisor in all_hypervisors: - if host_compute_service["host"] == hypervisor["hypervisor_name"]: - hv_obj = hypervisor - - if not hv_obj: - continue - - valid_hvs.append(get_hv_info(hv_obj, aggregate, host_compute_service)) - return valid_hvs - - -def update_slots(flavors: List, host_info_list: List, slots_dict: Dict) -> Dict: - """ - update total slots by calculating slots available for a set of flavors on a set of hosts - :param flavors: a list of flavors - :param host_info_list: a list of dictionaries holding info about a hypervisor capacity/availability - :param slots_dict: dictionary of slot info to update - :return: - """ - - for flavor in flavors: - flavor_reqs = get_flavor_requirements(flavor) - for hypervisor in host_info_list: - slots_dict[flavor["name"]] += calculate_slots_on_hv( - flavor["name"], flavor_reqs, hypervisor - ) - return slots_dict - - -def get_slottifier_details(instance: str) -> str: - """ - This function gets calculates slots available for each flavor in openstack and outputs results in - data string format which can be posted to InfluxDB - :param instance: which cloud to calculate slots for - :return: A data string of scraped info - """ - all_openstack_info = get_openstack_resources(instance) - - slots_dict = { - flavor["name"]: SlottifierEntry() for flavor in all_openstack_info["flavors"] - } - for aggregate in all_openstack_info["aggregates"]: - valid_flavors = get_valid_flavors_for_aggregate( - all_openstack_info["flavors"], aggregate - ) - - aggregate_host_info = get_all_hv_info_for_aggregate( - aggregate, - all_openstack_info["compute_services"], - all_openstack_info["hypervisors"], - ) - - slots_dict = update_slots(valid_flavors, aggregate_host_info, slots_dict) - - return convert_to_data_string(instance, slots_dict) - - -def main(user_args: List): - """ - send slottifier info to influx - :param user_args: args passed into script by user - """ - influxdb_args = parse_args(user_args, description="Get All Service Statuses") - run_scrape(influxdb_args, get_slottifier_details) - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/MonitoringTools/usr/local/bin/slottifier_entry.py b/MonitoringTools/usr/local/bin/slottifier_entry.py deleted file mode 100644 index e6ffade5..00000000 --- a/MonitoringTools/usr/local/bin/slottifier_entry.py +++ /dev/null @@ -1,41 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class SlottifierEntry: - """ - A dataclass to hold slottifier information - :param slots_available: Number of slots available for a flavor - :param estimated_gpu_slots_used: Number of gpu slots currently used that could host this flavor - estimated by amount of cores/mem already used by hvs as there's no way in openstack to find this out directly - :param max_gpu_slots_capacity: Number of gpus available on all compatible hypervisors to build this flavor on - :param max_gpu_slots_capacity_enabled: like max_gpu_slots_capacity, but only counting hosts with nova-compute - service enabled - """ - - slots_available: int = 0 - estimated_gpu_slots_used: int = 0 - max_gpu_slots_capacity: int = 0 - max_gpu_slots_capacity_enabled: int = 0 - - def __add__(self, other): - """ - dunder method to add two SlottifierEntry values together. - :param other: Another SlottifierEntry dataclass to add - :return: A SlottifierEntry dataclass where each attribute value from current dataclass and given dataclass are - added together - """ - if not isinstance(other, SlottifierEntry): - raise TypeError( - f"Unsupported operand type for +: '{type(self)}' and '{type(other)}'" - ) - - return SlottifierEntry( - slots_available=self.slots_available + other.slots_available, - estimated_gpu_slots_used=self.estimated_gpu_slots_used - + other.estimated_gpu_slots_used, - max_gpu_slots_capacity=self.max_gpu_slots_capacity - + other.max_gpu_slots_capacity, - max_gpu_slots_capacity_enabled=self.max_gpu_slots_capacity_enabled - + other.max_gpu_slots_capacity_enabled, - )