From 284ea41323d22ba87b15e2cddf0cfdd8cd9ebdde Mon Sep 17 00:00:00 2001 From: Koen Martens Date: Mon, 13 Mar 2023 10:33:00 +0100 Subject: [PATCH] Allow usage of regex in `LOG_FILTER` setting Fix #2893 --- RELEASE.md | 4 ++++ THANKS | 1 + docs/settings.rst | 31 ++++++++++++++++-------- pelican/log.py | 39 +++++++++++++++++++++++++++--- pelican/settings.py | 3 ++- pelican/tests/test_log.py | 30 ++++++++++++++++++----- pelican/tests/test_settings.py | 44 +++++++++++++++++++++++++++++++++- 7 files changed, 131 insertions(+), 21 deletions(-) create mode 100644 RELEASE.md diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 000000000..0a251dd43 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,4 @@ +Release type: minor + +Allow regular expressions in `LOG_FILTER` setting + diff --git a/THANKS b/THANKS index 28b438847..0f4e46f6a 100644 --- a/THANKS +++ b/THANKS @@ -97,6 +97,7 @@ Julian Berman Justin Mayer Kevin Deldycke Kevin Yap +Koen Martens Kyle Fuller Laureline Guerin Leonard Huang diff --git a/docs/settings.rst b/docs/settings.rst index a77685147..4e3d2fc4d 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -130,12 +130,18 @@ Basic settings .. data:: LOG_FILTER = [] - A list of tuples containing the logging level (up to ``warning``) and the - message to be ignored. + A list of tuples containing the type (either ``string`` or ``regex``), + the logging level (up to ``warning``) and a string. If the type is ``string`` + messages that are equal to the third argument are not shown. If the type is + ``regex``, the third argument is interpreted as a regular expression and any + message matching that will not be shown. Example:: - LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] + LOG_FILTER = [ + ('string', logging.WARN, 'Empty theme folder. Using `basic` theme.'), + ('regex', logging.WARN, r'Cannot get modification stamp for /foo/.*'), + ] .. data:: READERS = {} @@ -1304,15 +1310,19 @@ the **meaningful** error message in the middle of tons of annoying log output can be quite tricky. In order to filter out redundant log messages, Pelican comes with the ``LOG_FILTER`` setting. -``LOG_FILTER`` should be a list of tuples ``(level, msg)``, each of them being -composed of the logging level (up to ``warning``) and the message to be +``LOG_FILTER`` should be a list of tuples ``(type, level, msg_or_regexp)``, each +of them being composed of the type (``string`` or ``regex``), the logging level +(up to ``warning``) and the message or regular expression to be ignored. Simply populate the list with the log messages you want to hide, and they will be filtered out. For example:: import logging - LOG_FILTER = [(logging.WARN, 'TAG_SAVE_AS is set to False')] + LOG_FILTER = [ + ('string', logging.WARN, 'TAG_SAVE_AS is set to False'), + ('regex', logging.WARN, r'Cannot get modification stamp for /foo/.*'), + ] It is possible to filter out messages by a template. Check out source code to obtain a template. @@ -1320,13 +1330,14 @@ obtain a template. For example:: import logging - LOG_FILTER = [(logging.WARN, 'Empty alt attribute for image %s in %s')] + LOG_FILTER = [('string', logging.WARN, 'Empty alt attribute for image %s in %s')] .. Warning:: - Silencing messages by templates is a dangerous feature. It is possible to - unintentionally filter out multiple message types with the same template - (including messages from future Pelican versions). Proceed with caution. + Silencing messages by templates or regular expressons is a dangerous + feature. It is possible to unintentionally filter out multiple message + types with the same template (including messages from future Pelican + versions). Proceed with caution. .. note:: diff --git a/pelican/log.py b/pelican/log.py index be176ea89..6f8b17962 100644 --- a/pelican/log.py +++ b/pelican/log.py @@ -1,11 +1,14 @@ import logging +import re +import warnings from collections import defaultdict from rich.console import Console from rich.logging import RichHandler __all__ = [ - 'init' + 'init', + 'LimitFilter', ] console = Console() @@ -23,11 +26,37 @@ class LimitFilter(logging.Filter): LOGS_DEDUP_MIN_LEVEL = logging.WARNING - _ignore = set() + ignore = set() + ignore_regexp = set() _raised_messages = set() _threshold = 5 _group_count = defaultdict(int) + @classmethod + def add_ignore_rule(cls, rule_specification): + if len(rule_specification) == 2: # old-style string or template + LimitFilter.ignore.add(rule_specification) + warnings.warn( + '2-tuple specification of LOG_FILTER item is deprecated,' + + 'replace with 3-tuple starting with \'string\' (see' + + 'documentation of LOG_FILTER for more details)', + FutureWarning + ) + elif len(rule_specification) == 3: # new-style string/template/regexp + if rule_specification[0] == "string": + LimitFilter.ignore.add(rule_specification[1:]) + elif rule_specification[0] == "regex": + regex = re.compile(rule_specification[2]) + LimitFilter.ignore_regexp.add((rule_specification[1], regex)) + else: + raise ValueError( + f"Invalid LOG_FILTER type '{rule_specification[0]}'" + ) + else: + raise ValueError( + f"Invalid item '{str(rule_specification)}' in LOG_FILTER" + ) + def filter(self, record): # don't limit log messages for anything above "warning" if record.levelno > self.LOGS_DEDUP_MIN_LEVEL: @@ -50,7 +79,11 @@ def filter(self, record): if logger_level > logging.DEBUG: template_key = (record.levelno, record.msg) message_key = (record.levelno, record.getMessage()) - if (template_key in self._ignore or message_key in self._ignore): + if template_key in self.ignore or message_key in self.ignore: + return False + if any(regexp[1].match(record.getMessage()) + for regexp in self.ignore_regexp + if regexp[0] == record.levelno): return False # check if we went over threshold diff --git a/pelican/settings.py b/pelican/settings.py index 9a54b2a68..70e3c34a7 100644 --- a/pelican/settings.py +++ b/pelican/settings.py @@ -518,7 +518,8 @@ def configure_settings(settings): # specify the log messages to be ignored log_filter = settings.get('LOG_FILTER', DEFAULT_CONFIG['LOG_FILTER']) - LimitFilter._ignore.update(set(log_filter)) + for item in log_filter: + LimitFilter.add_ignore_rule(item) # lookup the theme in "pelican/themes" if the given one doesn't exist if not os.path.isdir(settings['THEME']): diff --git a/pelican/tests/test_log.py b/pelican/tests/test_log.py index 1f2fb83a8..0191fe363 100644 --- a/pelican/tests/test_log.py +++ b/pelican/tests/test_log.py @@ -1,4 +1,5 @@ import logging +import re import unittest from collections import defaultdict from contextlib import contextmanager @@ -19,7 +20,8 @@ def tearDown(self): super().tearDown() def _reset_limit_filter(self): - log.LimitFilter._ignore = set() + log.LimitFilter.ignore = set() + log.LimitFilter.ignore_regexp = set() log.LimitFilter._raised_messages = set() log.LimitFilter._threshold = 5 log.LimitFilter._group_count = defaultdict(int) @@ -49,7 +51,7 @@ def do_logging(): # filter by template with self.reset_logger(): - log.LimitFilter._ignore.add((logging.WARNING, 'Log %s')) + log.LimitFilter.ignore.add((logging.WARNING, 'Log %s')) do_logging() self.assertEqual( self.handler.count_logs('Log \\d', logging.WARNING), @@ -60,7 +62,7 @@ def do_logging(): # filter by exact message with self.reset_logger(): - log.LimitFilter._ignore.add((logging.WARNING, 'Log 3')) + log.LimitFilter.ignore.add((logging.WARNING, 'Log 3')) do_logging() self.assertEqual( self.handler.count_logs('Log \\d', logging.WARNING), @@ -69,14 +71,30 @@ def do_logging(): self.handler.count_logs('Another log \\d', logging.WARNING), 5) - # filter by both + # filter by regular expression with self.reset_logger(): - log.LimitFilter._ignore.add((logging.WARNING, 'Log 3')) - log.LimitFilter._ignore.add((logging.WARNING, 'Another log %s')) + log.LimitFilter.ignore_regexp.add((logging.WARNING, + re.compile(r'Log.*'))) + log.LimitFilter.ignore_regexp.add((logging.WARNING, + re.compile(r'.*log 4'))) do_logging() self.assertEqual( self.handler.count_logs('Log \\d', logging.WARNING), + 0) + self.assertEqual( + self.handler.count_logs('Another log \\d', logging.WARNING), 4) + + # filter by all + with self.reset_logger(): + log.LimitFilter.ignore.add((logging.WARNING, 'Log 3')) + log.LimitFilter.ignore.add((logging.WARNING, 'Another log %s')) + log.LimitFilter.ignore_regexp.add((logging.WARNING, + re.compile(r'Lo.*4$'))) + do_logging() + self.assertEqual( + self.handler.count_logs('Log \\d', logging.WARNING), + 3) self.assertEqual( self.handler.count_logs('Another log \\d', logging.WARNING), 0) diff --git a/pelican/tests/test_settings.py b/pelican/tests/test_settings.py index 0f630ad55..89412830e 100644 --- a/pelican/tests/test_settings.py +++ b/pelican/tests/test_settings.py @@ -1,9 +1,11 @@ import copy import locale +import logging import os +import re from os.path import abspath, dirname, join - +from pelican.log import LimitFilter from pelican.settings import (DEFAULT_CONFIG, DEFAULT_THEME, _printf_s_to_format_field, configure_settings, @@ -108,6 +110,46 @@ def test_configure_settings(self): configure_settings(settings) self.assertEqual(settings['FEED_DOMAIN'], 'http://feeds.example.com') + def test_configure_log_filter_settings(self): + # Various forms of filter settings should be applied correctly. + settings = { + 'LOG_FILTER': [ + (logging.WARNING, 'foo'), + ('string', logging.ERROR, 'bar'), + ('regex', logging.INFO, r'baz.*boo'), + ], + 'PATH': os.curdir, + 'THEME': DEFAULT_THEME, + } + with self.assertWarns( + FutureWarning, + msg='2-tuple specification of LOG_FILTER item is deprecated,' + + 'replace with 3-tuple starting with \'string\' (see' + + 'documentation of LOG_FILTER for more details)'): + configure_settings(settings) + + self.assertEqual(LimitFilter.ignore, { + (logging.WARNING, 'foo'), + (logging.ERROR, 'bar'), + }) + self.assertEqual(LimitFilter.ignore_regexp, { + (logging.INFO, re.compile(r'baz.*boo')) + }) + + settings['LOG_FILTER'] = [(1, 2, 3, 4)] + with self.assertRaisesRegex( + ValueError, + r"Invalid item '\(1, 2, 3, 4\)' in LOG_FILTER" + ): + configure_settings(settings) + + settings['LOG_FILTER'] = [('foo', 'bar', 'baz')] + with self.assertRaisesRegex( + ValueError, + r"Invalid LOG_FILTER type 'foo'" + ): + configure_settings(settings) + def test_theme_settings_exceptions(self): settings = self.settings