diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 8b9d22b84..2eba36db2 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -275,7 +275,7 @@ def _validate_nodes_option(self): utils.fatal(f"Overriding current user '{self.current_user}' by '{user}'. Ouch, don't do it.") self.user_at_node_list = [value for (user, node), value in zip(li, self.user_at_node_list) if node != me] for user, node in (utils.parse_user_at_host(x) for x in self.user_at_node_list): - utils.node_reachable_check(node) + utils.ssh_reachable_check(node) def _validate_cluster_node(self): """ @@ -2449,7 +2449,7 @@ def bootstrap_join(context): _context.initialize_user() remote_user, cluster_node = _parse_user_at_host(_context.cluster_node, _context.current_user) - utils.node_reachable_check(cluster_node) + utils.ssh_reachable_check(cluster_node) join_ssh(cluster_node, remote_user) remote_user = utils.user_of(cluster_node) diff --git a/crmsh/qdevice.py b/crmsh/qdevice.py index caa390518..05df531b6 100644 --- a/crmsh/qdevice.py +++ b/crmsh/qdevice.py @@ -208,14 +208,11 @@ def check_qnetd_addr(qnetd_addr): except socket.error: raise ValueError("host \"{}\" is unreachable".format(qnetd_addr)) - utils.node_reachable_check(qnetd_addr) + utils.ssh_reachable_check(qnetd_addr) if utils.InterfacesInfo.ip_in_local(qnetd_ip): raise ValueError("host for qnetd must be a remote one") - if not utils.check_port_open(qnetd_ip, 22): - raise ValueError("ssh service on \"{}\" not available".format(qnetd_addr)) - @staticmethod def check_qdevice_port(qdevice_port): if not utils.valid_port(qdevice_port): diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index b975dbc6d..daeaaf775 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -26,7 +26,7 @@ from .prun import prun from .service_manager import ServiceManager from .sh import ShellUtils -from .ui_node import parse_option_for_nodes +from . import ui_utils from . import constants @@ -167,7 +167,13 @@ def do_start(self, context, *args): ''' Starts the cluster stack on all nodes or specific node(s) ''' - node_list = parse_option_for_nodes(context, *args) + try: + node_list = ui_utils.parse_and_validate_node_args("start", *args) + except utils.NoSSHError as msg: + logger.error('%s', msg) + logger.info("Please try 'crm cluster start' on each node") + return False + service_check_list = ["pacemaker.service"] start_qdevice = False if corosync.is_qdevice_configured(): @@ -175,15 +181,10 @@ def do_start(self, context, *args): service_check_list.append("corosync-qdevice.service") service_manager = ServiceManager() - try: - for node in node_list[:]: - if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]): - logger.info("The cluster stack already started on {}".format(node)) - node_list.remove(node) - except utils.NoSSHError as msg: - logger.error('%s', msg) - logger.info("Please try 'crm cluster start' on each node") - return False + for node in node_list[:]: + if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]): + logger.info("The cluster stack already started on {}".format(node)) + node_list.remove(node) if not node_list: return @@ -248,13 +249,14 @@ def do_stop(self, context, *args): ''' Stops the cluster stack on all nodes or specific node(s) ''' - node_list = parse_option_for_nodes(context, *args) try: - node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)] + node_list = ui_utils.parse_and_validate_node_args("stop", *args) except utils.NoSSHError as msg: logger.error('%s', msg) logger.info("Please try 'crm cluster stop' on each node") return False + + node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)] if not node_list: return logger.debug(f"stop node list: {node_list}") @@ -297,7 +299,7 @@ def do_enable(self, context, *args): ''' Enable the cluster services on this node ''' - node_list = parse_option_for_nodes(context, *args) + node_list = ui_utils.parse_and_validate_node_args("enable", *args) service_manager = ServiceManager() node_list = service_manager.enable_service("pacemaker.service", node_list=node_list) if service_manager.service_is_available("corosync-qdevice.service") and corosync.is_qdevice_configured(): @@ -310,7 +312,7 @@ def do_disable(self, context, *args): ''' Disable the cluster services on this node ''' - node_list = parse_option_for_nodes(context, *args) + node_list = ui_utils.parse_and_validate_node_args("disable", *args) service_manager = ServiceManager() node_list = service_manager.disable_service("pacemaker.service", node_list=node_list) service_manager.disable_service("corosync-qdevice.service", node_list=node_list) diff --git a/crmsh/ui_node.py b/crmsh/ui_node.py index 57e07b4e3..3c32396c7 100644 --- a/crmsh/ui_node.py +++ b/crmsh/ui_node.py @@ -6,7 +6,6 @@ import copy import subprocess from lxml import etree -from argparse import ArgumentParser, RawDescriptionHelpFormatter from . import config from . import command @@ -219,47 +218,6 @@ def print_node(uname, ident, node_type, other, inst_attr, offline): print(term.render("\t%s" % (s))) -def parse_option_for_nodes(context, *args): - """ - Parse option for nodes - Return a node list - """ - action_type = context.get_command_name() - action_target = "node" if action_type in ["standby", "online"] else "cluster service" - action = "{} {}".format(action_type, action_target) - usage_template = """ -Specify node(s) on which to {action}. -If no nodes are specified, {action} on the local node. -If --all is specified, {action} on all nodes.""" - addtion_usage = "" - if action_type == "standby": - usage_template += """ -\n\nAdditionally, you may specify a lifetime for the standby---if set to -"reboot", the node will be back online once it reboots. "forever" will -keep the node in standby after reboot. The life time defaults to -"forever".""" - addtion_usage = " [lifetime]" - - parser = ArgumentParser(description=usage_template.format(action=action), - usage="{} [--all | ... ]{}".format(action_type, addtion_usage), - add_help=False, - formatter_class=RawDescriptionHelpFormatter) - parser.add_argument("-h", "--help", action="store_true", dest="help", help="Show this help message") - parser.add_argument("--all", help="To {} on all nodes".format(action), action="store_true", dest="all") - - options, args = parser.parse_known_args(args) - if options.help: - parser.print_help() - raise utils.TerminateSubCommand(success=True) - if options is None or args is None: - raise utils.TerminateSubCommand - if options.all and args: - context.fatal_error("Should either use --all or specific node(s)") - - include_remote = action_type in ["standby", "online"] - return utils.validate_and_get_reachable_nodes(args, options.all, include_remote) - - class NodeMgmt(command.UI): ''' Nodes management class @@ -348,7 +306,7 @@ def do_standby(self, context, *args): args = args[:-1] # Parse node option - node_list = parse_option_for_nodes(context, *args) + node_list = ui_utils.parse_and_validate_node_args("standby", *args) if not node_list: return @@ -436,7 +394,7 @@ def do_online(self, context, *args): To avoid race condition for --all option, melt all online values into one cib replace session """ # Parse node option - node_list = parse_option_for_nodes(context, *args) + node_list = ui_utils.parse_and_validate_node_args("online", *args) if not node_list: return diff --git a/crmsh/ui_utils.py b/crmsh/ui_utils.py index 9cdcfaffd..d8d128c9c 100644 --- a/crmsh/ui_utils.py +++ b/crmsh/ui_utils.py @@ -6,6 +6,7 @@ import inspect from . import utils from . import log +from argparse import ArgumentParser, RawDescriptionHelpFormatter logger = log.setup_logger(__name__) @@ -162,3 +163,45 @@ def mknamed(): if max_args >= 0 and len(args) > max_args: raise ValueError("Expected (%s), takes at most %d arguments (%d given)" % (mknamed(), max_args-nskip, len(args)-nskip)) + + +def parse_and_validate_node_args(command_name, *args) -> list: + ''' + Parses option for node-related commands + Then validates and returns the reachable node list + ''' + action_target = "node" if command_name in ["standby", "online"] else "cluster service" + action = f"{command_name} {action_target}" + usage_template = """ +Specify node(s) on which to {action}. +If no nodes are specified, {action} on the local node. +If --all is specified, {action} on all nodes.""" + addtion_usage = "" + if command_name == "standby": + usage_template += """ +\n\nAdditionally, you may specify a lifetime for the standby---if set to +"reboot", the node will be back online once it reboots. "forever" will +keep the node in standby after reboot. The life time defaults to +"forever".""" + addtion_usage = " [lifetime]" + + parser = ArgumentParser( + description=usage_template.format(action=action), + usage=f"{command_name} [--all | ... ]{addtion_usage}", + add_help=False, + formatter_class=RawDescriptionHelpFormatter + ) + parser.add_argument("-h", "--help", action="store_true", dest="help", help="Show this help message") + parser.add_argument("--all", help=f"To {action} on all nodes", action="store_true", dest="all") + + options, args = parser.parse_known_args(args) + if options.help: + parser.print_help() + raise utils.TerminateSubCommand(success=True) + if options is None or args is None: + raise utils.TerminateSubCommand + if options.all and args: + raise ValueError("Should either use --all or specific node(s)") + + include_remote = command_name in ["standby", "online"] + return utils.validate_and_get_reachable_nodes(args, options.all, include_remote) diff --git a/crmsh/utils.py b/crmsh/utils.py index 621390c0d..4696f57ac 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2405,25 +2405,23 @@ def package_is_installed(pkg, remote_addr=None): return rc == 0 -def node_reachable_check(node, ping_count=1, port=22, timeout=3): +def ssh_reachable_check(node): """ - Check if node is reachable by using ping and socket to ssh port + Check if node is reachable by checking SSH port is open """ - rc, _, _ = ShellUtils().get_stdout_stderr(f"ping -n -c {ping_count} -W {timeout} {node}") - if rc == 0: - return True - # ping failed, try to connect to ssh port by socket - if check_port_open(node, port, timeout): + if node == this_node() or check_port_open(node, 22): return True - # both ping and socket failed - raise ValueError(f"host \"{node}\" is unreachable") + if config.core.no_ssh: + raise NoSSHError(constants.NO_SSH_ERROR_MSG) + else: + raise ValueError(f"host \"{node}\" is unreachable via SSH") def get_reachable_node_list(node_list:list[str]) -> list[str]: reachable_node_list = [] for node in node_list: try: - if node == this_node() or node_reachable_check(node): + if ssh_reachable_check(node): reachable_node_list.append(node) except ValueError as e: logger.warning(str(e)) @@ -2451,6 +2449,12 @@ def __init__(self, msg: str, dead_nodes=None): self.dead_nodes = dead_nodes or [] +class UnreachableNodeError(ValueError): + def __init__(self, msg: str, unreachable_nodes=None): + super().__init__(msg) + self.unreachable_nodes = unreachable_nodes or [] + + def check_all_nodes_reachable(action_to_do: str, peer_node: str = None): """ Check if all cluster nodes are reachable @@ -2461,7 +2465,7 @@ def check_all_nodes_reachable(action_to_do: str, peer_node: str = None): dead_nodes = [] for node in offline_nodes: try: - node_reachable_check(node) + ssh_reachable_check(node) except ValueError: dead_nodes.append(node) if dead_nodes: @@ -2472,8 +2476,17 @@ def check_all_nodes_reachable(action_to_do: str, peer_node: str = None): """ raise DeadNodeError(msg, dead_nodes) + unreachable_nodes = [] for node in online_nodes: - node_reachable_check(node) + try: + ssh_reachable_check(node) + except ValueError: + unreachable_nodes.append(node) + if unreachable_nodes: + msg = f"""There are unreachable nodes: {', '.join(unreachable_nodes)}. +Please check the network connectivity before {action_to_do}. + """ + raise UnreachableNodeError(msg, unreachable_nodes) def re_split_string(reg, string): diff --git a/test/features/cluster_blocking_ssh.feature b/test/features/cluster_blocking_ssh.feature index c2385e34b..b4717f146 100644 --- a/test/features/cluster_blocking_ssh.feature +++ b/test/features/cluster_blocking_ssh.feature @@ -60,8 +60,6 @@ Feature: cluster testing with ssh blocked And Run "firewall-cmd --zone=public --add-rich-rule='rule port port=22 protocol=tcp drop' --permanent && firewall-cmd --reload" on "hanode2" And Try "ssh -o ConnectTimeout=5 hanode2" on "hanode1" Then Except "ssh: connect to host hanode2 port 22: Connection timed out" in stderr - When Run "timeout 5s crm report || echo "timeout"" on "hanode1" - Then Expected "timeout" in stdout When Write multi lines to file "/etc/crm/crm.conf" on "hanode1" """ [core] diff --git a/test/features/qdevice_validate.feature b/test/features/qdevice_validate.feature index 1d38ea728..729b40164 100644 --- a/test/features/qdevice_validate.feature +++ b/test/features/qdevice_validate.feature @@ -23,7 +23,7 @@ Feature: corosync qdevice/qnetd options validate Scenario: Service ssh on qnetd node not available When Run "systemctl stop sshd.service" on "node-without-ssh" When Try "crm cluster init --qnetd-hostname=node-without-ssh" - Then Except "ERROR: cluster.init: ssh service on "node-without-ssh" not available" + Then Except "ERROR: cluster.init: host "node-without-ssh" is unreachable via SSH" @clean Scenario: Option "--qdevice-port" set wrong port diff --git a/test/unittests/test_qdevice.py b/test/unittests/test_qdevice.py index 70e56564d..39730a790 100644 --- a/test/unittests/test_qdevice.py +++ b/test/unittests/test_qdevice.py @@ -203,7 +203,7 @@ def test_qdevice_p12_on_cluster(self): self.assertEqual(res, "/etc/corosync/qdevice/net/node1.com/qdevice-net-node.p12") @mock.patch('crmsh.utils.InterfacesInfo.ip_in_local') - @mock.patch('crmsh.utils.node_reachable_check') + @mock.patch('crmsh.utils.ssh_reachable_check') @mock.patch('socket.getaddrinfo') def test_check_qnetd_addr_local(self, mock_getaddrinfo, mock_reachable, mock_in_local): mock_getaddrinfo.return_value = [(None, ("10.10.10.123",)),] diff --git a/test/unittests/test_ui_cluster.py b/test/unittests/test_ui_cluster.py index 5bebd98f8..a1af37dbb 100644 --- a/test/unittests/test_ui_cluster.py +++ b/test/unittests/test_ui_cluster.py @@ -38,7 +38,7 @@ def tearDownClass(cls): @mock.patch('logging.Logger.info') @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - @mock.patch('crmsh.ui_cluster.parse_option_for_nodes') + @mock.patch('crmsh.ui_utils.parse_and_validate_node_args') @mock.patch('crmsh.corosync.is_qdevice_configured') def test_do_start_already_started(self, mock_qdevice_configured, mock_parse_nodes, mock_active, mock_info): mock_qdevice_configured.return_value = False @@ -46,7 +46,7 @@ def test_do_start_already_started(self, mock_qdevice_configured, mock_parse_node mock_parse_nodes.return_value = ["node1", "node2"] mock_active.side_effect = [True, True] self.ui_cluster_inst.do_start(context_inst, "node1", "node2") - mock_parse_nodes.assert_called_once_with(context_inst, "node1", "node2") + mock_parse_nodes.assert_called_once_with("start", "node1", "node2") mock_active.assert_has_calls([ mock.call("pacemaker.service", remote_addr="node1"), mock.call("pacemaker.service", remote_addr="node2") @@ -63,7 +63,7 @@ def test_do_start_already_started(self, mock_qdevice_configured, mock_parse_node @mock.patch('crmsh.corosync.is_qdevice_configured') @mock.patch('crmsh.service_manager.ServiceManager.start_service') @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - @mock.patch('crmsh.ui_cluster.parse_option_for_nodes') + @mock.patch('crmsh.ui_utils.parse_and_validate_node_args') def test_do_start(self, mock_parse_nodes, mock_active, mock_start, mock_qdevice_configured, mock_info, mock_error, mock_start_pacemaker, mock_check_qdevice): context_inst = mock.Mock() mock_start_pacemaker.return_value = ["node1"] @@ -86,7 +86,7 @@ def test_do_start(self, mock_parse_nodes, mock_active, mock_start, mock_qdevice_ @mock.patch('crmsh.utils.wait_for_dc') @mock.patch('crmsh.ui_cluster.Cluster._node_ready_to_stop_cluster_service') - @mock.patch('crmsh.ui_cluster.parse_option_for_nodes') + @mock.patch('crmsh.ui_utils.parse_and_validate_node_args') def test_do_stop_return(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_service, mock_dc): mock_parse_nodes.return_value = ["node1", "node2"] mock_node_ready_to_stop_cluster_service.side_effect = [False, False] @@ -94,7 +94,7 @@ def test_do_stop_return(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_ context_inst = mock.Mock() self.ui_cluster_inst.do_stop(context_inst, "node1", "node2") - mock_parse_nodes.assert_called_once_with(context_inst, "node1", "node2") + mock_parse_nodes.assert_called_once_with("stop", "node1", "node2") mock_node_ready_to_stop_cluster_service.assert_has_calls([mock.call("node1"), mock.call("node2")]) mock_dc.assert_not_called() @@ -104,7 +104,7 @@ def test_do_stop_return(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_ @mock.patch('crmsh.ui_cluster.Cluster._set_dlm') @mock.patch('crmsh.utils.wait_for_dc') @mock.patch('crmsh.ui_cluster.Cluster._node_ready_to_stop_cluster_service') - @mock.patch('crmsh.ui_cluster.parse_option_for_nodes') + @mock.patch('crmsh.ui_utils.parse_and_validate_node_args') def test_do_stop(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_service, mock_dc, mock_set_dlm, mock_service_manager, mock_info, mock_debug): mock_parse_nodes.return_value = ["node1", "node2"] @@ -117,7 +117,7 @@ def test_do_stop(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_service context_inst = mock.Mock() self.ui_cluster_inst.do_stop(context_inst, "node1", "node2") - mock_parse_nodes.assert_called_once_with(context_inst, "node1", "node2") + mock_parse_nodes.assert_called_once_with("stop", "node1", "node2") mock_node_ready_to_stop_cluster_service.assert_has_calls([mock.call("node1"), mock.call("node2")]) mock_debug.assert_called_once_with("stop node list: ['node1']") mock_dc.assert_called_once_with("node1") diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index 4d75320fc..3e231626d 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -982,7 +982,7 @@ def test_is_block_device(mock_stat, mock_isblk): mock_isblk.assert_called_once_with(12345) -@mock.patch('crmsh.utils.node_reachable_check') +@mock.patch('crmsh.utils.ssh_reachable_check') @mock.patch('crmsh.xmlutil.CrmMonXmlParser') def test_check_all_nodes_reachable_dead_nodes(mock_xml, mock_reachable): mock_xml_inst = mock.Mock() @@ -995,7 +995,7 @@ def test_check_all_nodes_reachable_dead_nodes(mock_xml, mock_reachable): assert err.value.dead_nodes == ["node2"] -@mock.patch('crmsh.utils.node_reachable_check') +@mock.patch('crmsh.utils.ssh_reachable_check') @mock.patch('crmsh.xmlutil.CrmMonXmlParser') def test_check_all_nodes_reachable(mock_xml, mock_reachable): mock_xml_inst = mock.Mock() @@ -1410,7 +1410,7 @@ def test_is_dc_idle(mock_dc, mock_shell): @mock.patch('logging.Logger.warning') -@mock.patch('crmsh.utils.node_reachable_check') +@mock.patch('crmsh.utils.ssh_reachable_check') def test_get_reachable_node_list(mock_reachable, mock_warn): mock_reachable.side_effect = [False, True, ValueError("error for node3")] assert utils.get_reachable_node_list(["node1", "node2", "node3"]) == ["node2"]