diff --git a/changelog.d/18849.misc b/changelog.d/18849.misc new file mode 100644 index 00000000000..2fa37692eff --- /dev/null +++ b/changelog.d/18849.misc @@ -0,0 +1 @@ +Switch to OpenTracing's `ContextVarsScopeManager` instead of our own custom `LogContextScopeManager`. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 3ef97f23c96..7ea3f3d726d 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -56,7 +56,6 @@ from twisted.python.threadpool import ThreadPool if TYPE_CHECKING: - from synapse.logging.scopecontextmanager import _LogContextScope from synapse.types import ISynapseReactor logger = logging.getLogger(__name__) @@ -230,14 +229,13 @@ class ContextRequest: class _Sentinel: """Sentinel to represent the root context""" - __slots__ = ["previous_context", "finished", "request", "scope", "tag"] + __slots__ = ["previous_context", "finished", "request", "tag"] def __init__(self) -> None: # Minimal set for compatibility with LoggingContext self.previous_context = None self.finished = False self.request = None - self.scope = None self.tag = None def __str__(self) -> str: @@ -290,7 +288,6 @@ class LoggingContext: "finished", "request", "tag", - "scope", ] def __init__( @@ -311,7 +308,6 @@ def __init__( self.main_thread = get_thread_id() self.request = None self.tag = "" - self.scope: Optional["_LogContextScope"] = None # keep track of whether we have hit the __exit__ block for this context # (suggesting that the the thing that created the context thinks it should @@ -324,9 +320,6 @@ def __init__( # we track the current request_id self.request = self.parent_context.request - # we also track the current scope: - self.scope = self.parent_context.scope - if request is not None: # the request param overrides the request from the parent context self.request = request diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 46fdd8ab623..b596b1abdbf 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -251,18 +251,17 @@ class _DummyTagNames: try: import opentracing import opentracing.tags + from opentracing.scope_managers.contextvars import ContextVarsScopeManager tags = opentracing.tags except ImportError: opentracing = None # type: ignore[assignment] tags = _DummyTagNames # type: ignore[assignment] + ContextVarsScopeManager = None # type: ignore try: from jaeger_client import Config as JaegerConfig - - from synapse.logging.scopecontextmanager import LogContextScopeManager except ImportError: JaegerConfig = None # type: ignore - LogContextScopeManager = None # type: ignore try: @@ -484,7 +483,7 @@ def init_tracer(hs: "HomeServer") -> None: config = JaegerConfig( config=jaeger_config, service_name=f"{hs.config.server.server_name} {instance_name_by_type}", - scope_manager=LogContextScopeManager(), + scope_manager=ContextVarsScopeManager(), metrics_factory=PrometheusMetricsFactory(), ) diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py deleted file mode 100644 index feaadc4d87a..00000000000 --- a/synapse/logging/scopecontextmanager.py +++ /dev/null @@ -1,161 +0,0 @@ -# -# This file is licensed under the Affero General Public License (AGPL) version 3. -# -# Copyright 2019 The Matrix.org Foundation C.I.C. -# Copyright (C) 2023 New Vector, Ltd -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# See the GNU Affero General Public License for more details: -# . -# -# Originally licensed under the Apache License, Version 2.0: -# . -# -# [This file includes modifications made by New Vector Limited] -# -# - -import logging -from typing import Optional - -from opentracing import Scope, ScopeManager, Span - -from synapse.logging.context import ( - LoggingContext, - current_context, - nested_logging_context, -) - -logger = logging.getLogger(__name__) - - -class LogContextScopeManager(ScopeManager): - """ - The LogContextScopeManager tracks the active scope in opentracing - by using the log contexts which are native to synapse. This is so - that the basic opentracing api can be used across twisted defereds. - - It would be nice just to use opentracing's ContextVarsScopeManager, - but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301. - """ - - def __init__(self) -> None: - pass - - @property - def active(self) -> Optional[Scope]: - """ - Returns the currently active Scope which can be used to access the - currently active Scope.span. - If there is a non-null Scope, its wrapped Span - becomes an implicit parent of any newly-created Span at - Tracer.start_active_span() time. - - Return: - The Scope that is active, or None if not available. - """ - ctx = current_context() - return ctx.scope - - def activate(self, span: Span, finish_on_close: bool) -> Scope: - """ - Makes a Span active. - Args - span: the span that should become active. - finish_on_close: whether Span should be automatically finished when - Scope.close() is called. - - Returns: - Scope to control the end of the active period for - *span*. It is a programming error to neglect to call - Scope.close() on the returned instance. - """ - - ctx = current_context() - - if not ctx: - logger.error("Tried to activate scope outside of loggingcontext") - return Scope(None, span) # type: ignore[arg-type] - - if ctx.scope is not None: - # start a new logging context as a child of the existing one. - # Doing so -- rather than updating the existing logcontext -- means that - # creating several concurrent spans under the same logcontext works - # correctly. - ctx = nested_logging_context("") - enter_logcontext = True - else: - # if there is no span currently associated with the current logcontext, we - # just store the scope in it. - # - # This feels a bit dubious, but it does hack around a problem where a - # span outlasts its parent logcontext (which would otherwise lead to - # "Re-starting finished log context" errors). - enter_logcontext = False - - scope = _LogContextScope(self, span, ctx, enter_logcontext, finish_on_close) - ctx.scope = scope - if enter_logcontext: - ctx.__enter__() - - return scope - - -class _LogContextScope(Scope): - """ - A custom opentracing scope, associated with a LogContext - - * When the scope is closed, the logcontext's active scope is reset to None. - and - if enter_logcontext was set - the logcontext is finished too. - """ - - def __init__( - self, - manager: LogContextScopeManager, - span: Span, - logcontext: LoggingContext, - enter_logcontext: bool, - finish_on_close: bool, - ): - """ - Args: - manager: - the manager that is responsible for this scope. - span: - the opentracing span which this scope represents the local - lifetime for. - logcontext: - the log context to which this scope is attached. - enter_logcontext: - if True the log context will be exited when the scope is finished - finish_on_close: - if True finish the span when the scope is closed - """ - super().__init__(manager, span) - self.logcontext = logcontext - self._finish_on_close = finish_on_close - self._enter_logcontext = enter_logcontext - - def __str__(self) -> str: - return f"Scope<{self.span}>" - - def close(self) -> None: - active_scope = self.manager.active - if active_scope is not self: - logger.error( - "Closing scope %s which is not the currently-active one %s", - self, - active_scope, - ) - - if self._finish_on_close: - self.span.finish() - - self.logcontext.scope = None - - if self._enter_logcontext: - self.logcontext.__exit__(None, None, None) diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index 7c5046babaf..5fe57d100ec 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -19,7 +19,7 @@ # # -from typing import Awaitable, cast +from typing import Awaitable, Dict, cast from twisted.internet import defer from twisted.internet.testing import MemoryReactorClock @@ -38,9 +38,11 @@ from synapse.util import Clock try: - from synapse.logging.scopecontextmanager import LogContextScopeManager + import opentracing + from opentracing.scope_managers.contextvars import ContextVarsScopeManager except ImportError: - LogContextScopeManager = None # type: ignore + opentracing = None # type: ignore + ContextVarsScopeManager = None # type: ignore try: import jaeger_client @@ -54,9 +56,10 @@ logger = logging.getLogger(__name__) -class LogContextScopeManagerTestCase(TestCase): +class TracingScopeTestCase(TestCase): """ - Test logging contexts and active opentracing spans. + Test that our tracing machinery works well in a variety of situations (especially + with Twisted's runtime and deferreds). There's casts throughout this from generic opentracing objects (e.g. opentracing.Span) to the ones specific to Jaeger since they have additional @@ -64,7 +67,7 @@ class LogContextScopeManagerTestCase(TestCase): opentracing backend is Jaeger. """ - if LogContextScopeManager is None: + if opentracing is None: skip = "Requires opentracing" # type: ignore[unreachable] if jaeger_client is None: skip = "Requires jaeger_client" # type: ignore[unreachable] @@ -74,7 +77,7 @@ def setUp(self) -> None: # global variables that power opentracing. We create our own tracer instance # and test with it. - scope_manager = LogContextScopeManager() + scope_manager = ContextVarsScopeManager() config = jaeger_client.config.Config( config={}, service_name="test", scope_manager=scope_manager ) @@ -208,6 +211,135 @@ async def root() -> None: [scopes[1].span, scopes[2].span, scopes[0].span], ) + def test_run_in_background_active_scope_still_available(self) -> None: + """ + Test that tasks running via `run_in_background` still have access to the + active tracing scope. + + This is a regression test for a previous Synapse issue where the tracing scope + would `__exit__` and close before the `run_in_background` task completed and our + own previous custom `_LogContextScope.close(...)` would clear + `LoggingContext.scope` preventing further tracing spans from having the correct + parent. + """ + reactor = MemoryReactorClock() + clock = Clock(reactor) + + scope_map: Dict[str, opentracing.Scope] = {} + + async def async_task() -> None: + root_scope = scope_map["root"] + root_context = cast(jaeger_client.SpanContext, root_scope.span.context) + + self.assertEqual( + self._tracer.active_span, + root_scope.span, + "expected to inherit the root tracing scope from where this was run", + ) + + # Return control back to the reactor thread and wait an arbitrary amount + await clock.sleep(4) + + # This is a key part of what we're testing! In a previous version of + # Synapse, we would lose the active span at this point. + self.assertEqual( + self._tracer.active_span, + root_scope.span, + "expected to still have a root tracing scope/span active", + ) + + # For complete-ness sake, let's also trace more sub-tasks here and assert + # they have the correct span parents as well (root) + + # Start tracing some other sub-task. + # + # This is a key part of what we're testing! In a previous version of + # Synapse, it would have the incorrect span parents. + scope = start_active_span( + "task1", + tracer=self._tracer, + ) + scope_map["task1"] = scope + + # Ensure the span parent is pointing to the root scope + context = cast(jaeger_client.SpanContext, scope.span.context) + self.assertEqual( + context.parent_id, + root_context.span_id, + "expected task1 parent to be the root span", + ) + + # Ensure that the active span is our new sub-task now + self.assertEqual(self._tracer.active_span, scope.span) + # Return control back to the reactor thread and wait an arbitrary amount + await clock.sleep(4) + # We should still see the active span as the scope wasn't closed yet + self.assertEqual(self._tracer.active_span, scope.span) + scope.close() + + async def root() -> None: + with start_active_span( + "root span", + tracer=self._tracer, + # We will close this off later. We're basically just mimicking the same + # pattern for how we handle requests. We pass the span off to the + # request for it to finish. + finish_on_close=False, + ) as root_scope: + scope_map["root"] = root_scope + self.assertEqual(self._tracer.active_span, root_scope.span) + + # Fire-and-forget a task + # + # XXX: The root scope context manager will `__exit__` before this task + # completes. + run_in_background(async_task) + + # Because we used `run_in_background`, the active span should still be + # the root. + self.assertEqual(self._tracer.active_span, root_scope.span) + + # We shouldn't see any active spans outside of the scope + self.assertIsNone(self._tracer.active_span) + + with LoggingContext("root context"): + # Start the test off + d_root = defer.ensureDeferred(root()) + + # Let the tasks complete + reactor.pump((2,) * 8) + self.successResultOf(d_root) + + # After we see all of the tasks are done (like a request when it + # `_finished_processing`), let's finish our root span + scope_map["root"].span.finish() + + # Sanity check again: We shouldn't see any active spans leftover in this + # this context. + self.assertIsNone(self._tracer.active_span) + + # The spans should be reported in order of their finishing: task 1, task 2, + # root. + # + # We use `assertIncludes` just as an easier way to see if items are missing or + # added. We assert the order just below + self.assertIncludes( + set(self._reporter.get_spans()), + { + scope_map["task1"].span, + scope_map["root"].span, + }, + exact=True, + ) + # This is where we actually assert the correct order + self.assertEqual( + self._reporter.get_spans(), + [ + scope_map["task1"].span, + scope_map["root"].span, + ], + ) + def test_trace_decorator_sync(self) -> None: """ Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`