Skip to content

Commit 70d0fd3

Browse files
committed
Fix logcontext problems in test_run_in_background_active_scope_still_available
This test is still expected to fail with our current `LogContextScopeManager` implementation but there were some flaws in how we were handling the logcontext before (like pumping the reactor in a non-sentinel logcontext) This test is originally from #18849 spawning from #18804 (comment)
1 parent 06a84f4 commit 70d0fd3

File tree

1 file changed

+172
-1
lines changed

1 file changed

+172
-1
lines changed

tests/logging/test_opentracing.py

Lines changed: 172 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
#
2020
#
2121

22-
from typing import Awaitable, Optional, cast
22+
from typing import Awaitable, Dict, Optional, cast
2323

2424
from twisted.internet import defer
2525
from twisted.internet.testing import MemoryReactorClock
2626

2727
from synapse.logging.context import (
2828
LoggingContext,
29+
PreserveLoggingContext,
30+
current_context,
2931
make_deferred_yieldable,
3032
run_in_background,
3133
)
@@ -40,6 +42,14 @@
4042

4143
from tests.server import get_clock
4244

45+
try:
46+
import opentracing
47+
48+
from synapse.logging.scopecontextmanager import LogContextScopeManager
49+
except ImportError:
50+
opentracing = None # type: ignore
51+
LogContextScopeManager = None # type: ignore
52+
4353
try:
4454
import jaeger_client
4555
except ImportError:
@@ -224,6 +234,167 @@ async def root() -> None:
224234
[scopes[1].span, scopes[2].span, scopes[0].span],
225235
)
226236

237+
def test_run_in_background_active_scope_still_available(self) -> None:
238+
"""
239+
Test that tasks running via `run_in_background` still have access to the
240+
active tracing scope.
241+
242+
This is a regression test for a previous Synapse issue where the tracing scope
243+
would `__exit__` and close before the `run_in_background` task completed and our
244+
own previous custom `_LogContextScope.close(...)` would clear
245+
`LoggingContext.scope` preventing further tracing spans from having the correct
246+
parent.
247+
"""
248+
reactor, clock = get_clock()
249+
250+
scope_map: Dict[str, opentracing.Scope] = {}
251+
252+
callback_finished = False
253+
254+
async def async_task() -> None:
255+
nonlocal callback_finished
256+
try:
257+
logger.info("asdf async_task1 - %s", current_context().scope)
258+
root_scope = scope_map["root"]
259+
root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
260+
261+
self.assertEqual(
262+
self._tracer.active_span,
263+
root_scope.span,
264+
"expected to inherit the root tracing scope from where this was run",
265+
)
266+
267+
# Return control back to the reactor thread and wait an arbitrary amount
268+
await clock.sleep(4)
269+
270+
logger.info("asdf async_task2 - %s", current_context().scope)
271+
272+
# This is a key part of what we're testing! In a previous version of
273+
# Synapse, we would lose the active span at this point.
274+
self.assertEqual(
275+
self._tracer.active_span,
276+
root_scope.span,
277+
"expected to still have a root tracing scope/span active",
278+
)
279+
280+
# For complete-ness sake, let's also trace more sub-tasks here and assert
281+
# they have the correct span parents as well (root)
282+
283+
# Start tracing some other sub-task.
284+
#
285+
# This is a key part of what we're testing! In a previous version of
286+
# Synapse, it would have the incorrect span parents.
287+
scope = start_active_span(
288+
"task1",
289+
tracer=self._tracer,
290+
)
291+
scope_map["task1"] = scope
292+
293+
# Ensure the span parent is pointing to the root scope
294+
context = cast(jaeger_client.SpanContext, scope.span.context)
295+
self.assertEqual(
296+
context.parent_id,
297+
root_context.span_id,
298+
"expected task1 parent to be the root span",
299+
)
300+
301+
# Ensure that the active span is our new sub-task now
302+
self.assertEqual(self._tracer.active_span, scope.span)
303+
# Return control back to the reactor thread and wait an arbitrary amount
304+
await clock.sleep(4)
305+
# We should still see the active span as the scope wasn't closed yet
306+
self.assertEqual(self._tracer.active_span, scope.span)
307+
# Clock the scope using the context manager lifecycle (`scope.close()`)
308+
with scope:
309+
pass
310+
finally:
311+
# When exceptions happen, we still want to mark the callback as finished
312+
# so that the test can complete and we see the underlying error.
313+
callback_finished = True
314+
315+
async def root() -> None:
316+
with start_active_span(
317+
"root span",
318+
tracer=self._tracer,
319+
# We will close this off later. We're basically just mimicking the same
320+
# pattern for how we handle requests. We pass the span off to the
321+
# request for it to finish.
322+
finish_on_close=False,
323+
) as root_scope:
324+
scope_map["root"] = root_scope
325+
self.assertEqual(self._tracer.active_span, root_scope.span)
326+
327+
logger.info("asdf root1 - %s", current_context().scope)
328+
329+
# Fire-and-forget a task
330+
#
331+
# XXX: The root scope context manager will `__exit__` before this task
332+
# completes.
333+
run_in_background(async_task)
334+
335+
logger.info("asdf root2 - %s", current_context().scope)
336+
337+
# Because we used `run_in_background`, the active span should still be
338+
# the root.
339+
self.assertEqual(self._tracer.active_span, root_scope.span)
340+
341+
logger.info("asdf root3 - %s", current_context().scope)
342+
343+
# We shouldn't see any active spans outside of the scope
344+
self.assertIsNone(self._tracer.active_span)
345+
346+
with LoggingContext(name="root logcontext", server_name="test_server"):
347+
# Start the test off
348+
d_root = defer.ensureDeferred(root())
349+
350+
# Pump the reactor until the tasks complete
351+
#
352+
# We're manually pumping the reactor (and causing any pending callbacks to
353+
# be called) so we need to be in the sentinel logcontext to avoid leaking
354+
# our current logcontext into the reactor (which would then get picked up
355+
# and associated with the next thing the reactor does). `with
356+
# PreserveLoggingContext()` will reset the logcontext to the sentinel while
357+
# we're pumping the reactor in the block and return us back to our current
358+
# logcontext after the block.
359+
with PreserveLoggingContext():
360+
reactor.pump((2,) * 8)
361+
self.successResultOf(d_root)
362+
363+
self.assertTrue(
364+
callback_finished,
365+
"Callback never finished which means the test probably didn't wait long enough",
366+
)
367+
368+
# After we see all of the tasks are done (like a request when it
369+
# `_finished_processing`), let's finish our root span
370+
scope_map["root"].span.finish()
371+
372+
# Sanity check again: We shouldn't see any active spans leftover in this
373+
# this context.
374+
self.assertIsNone(self._tracer.active_span)
375+
376+
# The spans should be reported in order of their finishing: task 1, task 2,
377+
# root.
378+
#
379+
# We use `assertIncludes` just as an easier way to see if items are missing or
380+
# added. We assert the order just below
381+
self.assertIncludes(
382+
set(self._reporter.get_spans()),
383+
{
384+
scope_map["task1"].span,
385+
scope_map["root"].span,
386+
},
387+
exact=True,
388+
)
389+
# This is where we actually assert the correct order
390+
self.assertEqual(
391+
self._reporter.get_spans(),
392+
[
393+
scope_map["task1"].span,
394+
scope_map["root"].span,
395+
],
396+
)
397+
227398
def test_trace_decorator_sync(self) -> None:
228399
"""
229400
Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`

0 commit comments

Comments
 (0)