Skip to content

Commit 22b19ea

Browse files
committed
Dev: ui_sbd: Refactor for the process of purging crashdump
1 parent 0c3bd0a commit 22b19ea

File tree

2 files changed

+101
-63
lines changed

2 files changed

+101
-63
lines changed

crmsh/sbd.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from . import corosync
1010
from . import xmlutil
1111
from . import watchdog
12+
from . import cibquery
1213
from .service_manager import ServiceManager
1314
from .sh import ShellUtils
1415

@@ -489,6 +490,7 @@ class SBDManager:
489490
SBD_RA = "stonith:fence_sbd"
490491
SBD_RA_ID = "stonith-sbd"
491492
SBD_DEVICE_MAX = 3
493+
SBD_CRASHDUMP_ACTION = "flush,crashdump"
492494

493495
class NotConfigSBD(Exception):
494496
pass
@@ -499,7 +501,8 @@ def __init__(
499501
timeout_dict: typing.Dict[str, int] | None = None,
500502
update_dict: typing.Dict[str, str] | None = None,
501503
diskless_sbd: bool = False,
502-
bootstrap_context: 'bootstrap.Context | None' = None
504+
bootstrap_context: 'bootstrap.Context | None' = None,
505+
crashdump: str | None = None
503506
):
504507
'''
505508
Init function which can be called from crm sbd subcommand or bootstrap
@@ -511,6 +514,7 @@ def __init__(
511514
self.cluster_is_running = ServiceManager().service_is_active(constants.PCMK_SERVICE)
512515
self.bootstrap_context = bootstrap_context
513516
self.overwrite_sysconfig = False
517+
self.crashdump = crashdump
514518

515519
# From bootstrap init or join process, override the values
516520
if self.bootstrap_context:
@@ -603,6 +607,7 @@ def configure_sbd(self):
603607
if not xmlutil.CrmMonXmlParser().is_resource_configured(self.SBD_RA):
604608
cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA}"
605609
sh.cluster_shell().get_stdout_or_raise_error(cmd)
610+
self.set_crashdump_option_in_fence_sbd()
606611
else:
607612
swt_value = self.timeout_dict.get("stonith-watchdog", 2*SBDTimeout.get_sbd_watchdog_timeout())
608613
utils.set_property("stonith-watchdog-timeout", swt_value)
@@ -734,6 +739,7 @@ def init_and_deploy_sbd(self, restart_first=False):
734739
return
735740

736741
self.initialize_sbd()
742+
self.set_crashdump_action()
737743
self.update_configuration()
738744
self.enable_sbd_service()
739745

@@ -787,6 +793,59 @@ def join_sbd(self, remote_user, peer_host):
787793
logger.info("Got {}SBD configuration".format("" if dev_list else "diskless "))
788794
self.enable_sbd_service()
789795

796+
def set_crashdump_action(self):
797+
'''
798+
Set crashdump timeout action in /etc/sysconfig/sbd
799+
'''
800+
if not self.crashdump or self.crashdump not in ("set", "restore"):
801+
return
802+
803+
comment_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/s/^/#__sbd_crashdump_backup__ /' {self.SYSCONFIG_SBD}"
804+
add_action_line = f"sed -i '/^#__sbd_crashdump_backup__/a SBD_TIMEOUT_ACTION={self.SBD_CRASHDUMP_ACTION}' {self.SYSCONFIG_SBD}"
805+
comment_out_action_line = f"sed -i 's/^#__sbd_crashdump_backup__ SBD_TIMEOUT_ACTION/SBD_TIMEOUT_ACTION/' {self.SYSCONFIG_SBD}"
806+
delete_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/d' {self.SYSCONFIG_SBD}"
807+
sbd_timeout_action_configured = SBDUtils.get_sbd_value_from_config("SBD_TIMEOUT_ACTION")
808+
shell = sh.cluster_shell()
809+
810+
if self.crashdump == "set":
811+
if not sbd_timeout_action_configured:
812+
logger.info("Set SBD_TIMEOUT_ACTION in %s: %s", self.SYSCONFIG_SBD, self.SBD_CRASHDUMP_ACTION)
813+
self.update_dict["SBD_TIMEOUT_ACTION"] = self.SBD_CRASHDUMP_ACTION
814+
elif sbd_timeout_action_configured != self.SBD_CRASHDUMP_ACTION:
815+
logger.info("Update SBD_TIMEOUT_ACTION in %s: %s", self.SYSCONFIG_SBD, self.SBD_CRASHDUMP_ACTION)
816+
shell.get_stdout_or_raise_error(f"{comment_action_line} && {add_action_line}")
817+
elif self.crashdump == "restore":
818+
if sbd_timeout_action_configured and sbd_timeout_action_configured == self.SBD_CRASHDUMP_ACTION:
819+
logger.info("Restore SBD_TIMEOUT_ACTION in %s", self.SYSCONFIG_SBD)
820+
shell.get_stdout_or_raise_error(f"{delete_action_line} && {comment_out_action_line}")
821+
822+
def set_crashdump_option_in_fence_sbd(self):
823+
'''
824+
Set crashdump option in fence_sbd resource
825+
'''
826+
if not self.crashdump or self.crashdump not in ("set", "restore"):
827+
return
828+
829+
shell = sh.cluster_shell()
830+
configure_show_in_xml = xmlutil.text2elem(shell.get_stdout_or_raise_error('crm configure show xml'))
831+
ra = cibquery.ResourceAgent("stonith", "", "fence_sbd")
832+
res_id_list = cibquery.get_primitives_with_ra(configure_show_in_xml, ra)
833+
if not res_id_list:
834+
return
835+
836+
for res in res_id_list:
837+
crashdump_value = cibquery.get_parameter_value(configure_show_in_xml, res, "crashdump")
838+
cmd = ""
839+
if utils.is_boolean_false(crashdump_value):
840+
if self.crashdump == "set":
841+
logger.info("Set crashdump option for fence_sbd resource '%s'", res)
842+
cmd = f"crm resource param {res} set crashdump 1"
843+
elif self.crashdump == "restore":
844+
logger.info("Delete crashdump option for fence_sbd resource '%s'", res)
845+
cmd = f"crm resource param {res} delete crashdump"
846+
if cmd:
847+
shell.get_stdout_or_raise_error(cmd)
848+
790849

791850
def cleanup_existing_sbd_resource():
792851
if xmlutil.CrmMonXmlParser().is_resource_configured(SBDManager.SBD_RA):

crmsh/ui_sbd.py

Lines changed: 41 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from crmsh import sh
1313
from crmsh import xmlutil
1414
from crmsh import constants
15-
from crmsh import cibquery
1615
from crmsh.service_manager import ServiceManager
1716

1817

@@ -313,60 +312,16 @@ def _adjust_timeout_dict(timeout_dict: dict) -> dict:
313312
return timeout_dict
314313
return timeout_dict
315314

316-
def _set_crashdump_option(self, delete=False):
317-
'''
318-
Set crashdump option for fence_sbd resource
319-
'''
320-
cib = xmlutil.text2elem(self.cluster_shell.get_stdout_or_raise_error('crm configure show xml'))
321-
ra = cibquery.ResourceAgent("stonith", "", "fence_sbd")
322-
res_id_list = cibquery.get_primitives_with_ra(cib, ra)
323-
if not res_id_list:
324-
if delete:
325-
return
326-
logger.error("No fence_sbd resource found")
327-
raise utils.TerminateSubCommand
328-
329-
crashdump_value = cibquery.get_parameter_value(cib, res_id_list[0], "crashdump")
330-
cmd = ""
331-
if utils.is_boolean_false(crashdump_value):
332-
if delete:
333-
return
334-
cmd = f"crm resource param {res_id_list[0]} set crashdump 1"
335-
logger.info("Set crashdump option for fence_sbd resource")
336-
elif delete:
337-
cmd = f"crm resource param {res_id_list[0]} delete crashdump"
338-
logger.info("Delete crashdump option for fence_sbd resource")
339-
if cmd:
340-
self.cluster_shell.get_stdout_or_raise_error(cmd)
341-
342-
def _set_crashdump_in_sysconfig(self, crashdump_watchdog_timeout=None, restore=False, diskless=False) -> dict:
315+
def _set_sbd_opts(self, crashdump_watchdog_timeout=None, restore=False, diskless=False) -> dict:
343316
update_dict = {}
344-
sbd_timeout_action_for_crashdump = "flush,crashdump"
345-
comment_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/s/^/#__sbd_crashdump_backup__ /' {sbd.SBDManager.SYSCONFIG_SBD}"
346-
add_action_line = f"sed -i '/^#__sbd_crashdump_backup__/a SBD_TIMEOUT_ACTION={sbd_timeout_action_for_crashdump}' {sbd.SBDManager.SYSCONFIG_SBD}"
347-
comment_out_action_line = f"sed -i 's/^#__sbd_crashdump_backup__ SBD_TIMEOUT_ACTION/SBD_TIMEOUT_ACTION/' {sbd.SBDManager.SYSCONFIG_SBD}"
348-
delete_action_line = f"sed -i '/^SBD_TIMEOUT_ACTION/d' {sbd.SBDManager.SYSCONFIG_SBD}"
349317

350-
sbd_timeout_action_configured = sbd.SBDUtils.get_sbd_value_from_config("SBD_TIMEOUT_ACTION")
351318
if restore:
352-
if sbd_timeout_action_configured and sbd_timeout_action_configured == sbd_timeout_action_for_crashdump:
353-
cmd_delete_and_comment_out = f"{delete_action_line} && {comment_out_action_line}"
354-
logger.info("Delete SBD_TIMEOUT_ACTION: %s and restore original value", sbd_timeout_action_for_crashdump)
355-
self.cluster_shell.get_stdout_or_raise_error(cmd_delete_and_comment_out)
356-
357319
sbd_opts = sbd.SBDUtils.get_sbd_value_from_config("SBD_OPTS")
358320
if sbd_opts and re.search(self.SBD_OPTS_RE, sbd_opts):
359321
sbd_opts = re.sub(self.SBD_OPTS_RE, '', sbd_opts)
360322
update_dict["SBD_OPTS"] = ' '.join(sbd_opts.split())
361323

362324
elif crashdump_watchdog_timeout:
363-
if not sbd_timeout_action_configured:
364-
update_dict["SBD_TIMEOUT_ACTION"] = sbd_timeout_action_for_crashdump
365-
elif sbd_timeout_action_configured != sbd_timeout_action_for_crashdump:
366-
cmd_comment_and_add = f"{comment_action_line} && {add_action_line}"
367-
self.cluster_shell.get_stdout_or_raise_error(cmd_comment_and_add)
368-
logger.info("Update SBD_TIMEOUT_ACTION in %s: %s", sbd.SBDManager.SYSCONFIG_SBD, sbd_timeout_action_for_crashdump)
369-
370325
value_for_diskless = " -Z" if diskless else ""
371326
value_for_sbd_opts = f"-C {crashdump_watchdog_timeout}{value_for_diskless}"
372327
sbd_opts = sbd.SBDUtils.get_sbd_value_from_config("SBD_OPTS")
@@ -421,13 +376,14 @@ def _configure_diskbase(self, parameter_dict: dict):
421376
# merge runtime timeout dict into parameter timeout dict without overwriting
422377
timeout_dict = {**self.device_meta_dict_runtime, **timeout_dict}
423378

379+
configure_crashdump = False
424380
crashdump_watchdog_timeout = parameter_dict.get("crashdump-watchdog", self.crashdump_watchdog_timeout_from_config)
425381
if self._should_configure_crashdump(crashdump_watchdog_timeout, timeout_dict.get("watchdog")):
382+
configure_crashdump = True
426383
self._check_kdump_service()
427-
self._set_crashdump_option()
428384
timeout_dict["msgwait"] = 2*timeout_dict["watchdog"] + crashdump_watchdog_timeout
429385
logger.info("Set msgwait-timeout to 2*watchdog-timeout + crashdump-watchdog-timeout: %s", timeout_dict["msgwait"])
430-
result_dict = self._set_crashdump_in_sysconfig(crashdump_watchdog_timeout)
386+
result_dict = self._set_sbd_opts(crashdump_watchdog_timeout)
431387
update_dict = {**update_dict, **result_dict}
432388

433389
if timeout_dict == self.device_meta_dict_runtime and not update_dict:
@@ -437,7 +393,8 @@ def _configure_diskbase(self, parameter_dict: dict):
437393
sbd_manager = sbd.SBDManager(
438394
device_list_to_init=self.device_list_from_config,
439395
timeout_dict=timeout_dict,
440-
update_dict=update_dict
396+
update_dict=update_dict,
397+
crashdump="set" if configure_crashdump else None
441398
)
442399
sbd_manager.init_and_deploy_sbd()
443400

@@ -455,10 +412,12 @@ def _configure_diskless(self, parameter_dict: dict):
455412
if watchdog_device != self.watchdog_device_from_config:
456413
update_dict["SBD_WATCHDOG_DEV"] = watchdog_device
457414

415+
configure_crashdump = False
458416
crashdump_watchdog_timeout = parameter_dict.get("crashdump-watchdog", self.crashdump_watchdog_timeout_from_config)
459417
if self._should_configure_crashdump(crashdump_watchdog_timeout, watchdog_timeout, diskless=True):
418+
configure_crashdump = True
460419
self._check_kdump_service()
461-
result_dict = self._set_crashdump_in_sysconfig(crashdump_watchdog_timeout, diskless=True)
420+
result_dict = self._set_sbd_opts(crashdump_watchdog_timeout, diskless=True)
462421
update_dict = {**update_dict, **result_dict}
463422
sbd_watchdog_timeout = watchdog_timeout or self.watchdog_timeout_from_config
464423
stonith_watchdog_timeout = sbd_watchdog_timeout + crashdump_watchdog_timeout
@@ -478,7 +437,8 @@ def _configure_diskless(self, parameter_dict: dict):
478437
sbd_manager = sbd.SBDManager(
479438
timeout_dict=timeout_dict,
480439
update_dict=update_dict,
481-
diskless_sbd=True
440+
diskless_sbd=True,
441+
crashdump="set" if configure_crashdump else None
482442
)
483443
sbd_manager.init_and_deploy_sbd(restart_first)
484444

@@ -595,6 +555,28 @@ def do_configure(self, context, *args) -> bool:
595555
print(usage)
596556
return False
597557

558+
def _purge_crashdump(self):
559+
'''
560+
Purge crashdump configuration from SBD
561+
'''
562+
timeout_dict, update_dict = {}, {}
563+
if self.device_list_from_config:
564+
timeout_dict["watchdog"] = self.device_meta_dict_runtime.get("watchdog")
565+
timeout_dict["msgwait"] = 2 * timeout_dict["watchdog"]
566+
logger.info("Set msgwait-timeout to 2*watchdog-timeout: %s", timeout_dict["msgwait"])
567+
else:
568+
timeout_dict["stonith-watchdog"] = 2 * self.watchdog_timeout_from_config
569+
logger.info("Set stonith-watchdog-timeout to 2*SBD_WATCHDOG_TIMEOUT: %s", timeout_dict["stonith-watchdog"])
570+
update_dict = self._set_sbd_opts(restore=True)
571+
572+
sbd_manager = sbd.SBDManager(
573+
device_list_to_init=self.device_list_from_config if self.device_list_from_config else None,
574+
timeout_dict=timeout_dict,
575+
update_dict=update_dict,
576+
crashdump="restore"
577+
)
578+
sbd_manager.init_and_deploy_sbd()
579+
598580
@command.completers(completers.choice(['crashdump']))
599581
def do_purge(self, context, *args) -> bool:
600582
'''
@@ -618,20 +600,17 @@ def do_purge(self, context, *args) -> bool:
618600

619601
utils.check_all_nodes_reachable("purging SBD")
620602

621-
with utils.leverage_maintenance_mode() as enabled:
622-
if not utils.able_to_restart_cluster(enabled):
623-
return False
603+
if purge_crashdump:
604+
self._purge_crashdump()
605+
else: # purge sbd from cluster
606+
with utils.leverage_maintenance_mode() as enabled:
607+
if not utils.able_to_restart_cluster(enabled):
608+
return False
624609

625-
if purge_crashdump:
626-
self._set_crashdump_option(delete=True)
627-
update_dict = self._set_crashdump_in_sysconfig(restore=True)
628-
if update_dict:
629-
sbd.SBDManager.update_sbd_configuration(update_dict)
630-
else:
631610
sbd.purge_sbd_from_cluster()
611+
bootstrap.restart_cluster()
632612

633-
bootstrap.restart_cluster()
634-
return True
613+
return True
635614

636615
def _print_sbd_type(self):
637616
if not self.service_manager.service_is_active(constants.SBD_SERVICE):

0 commit comments

Comments
 (0)