Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(runtime metrics): fix telemetry reporting [backport 3.1] #12522

Open
wants to merge 1 commit into
base: 3.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ tests/internal @DataDog/apm-core-python
tests/lib-injection @DataDog/apm-core-python
tests/opentelemetry @DataDog/apm-core-python
tests/opentracer @DataDog/apm-core-python
tests/runtime @DataDog/apm-core-python
tests/tracer @DataDog/apm-core-python

# Test Visibility and related
Expand Down Expand Up @@ -166,7 +165,7 @@ ddtrace/internal/remoteconfig @DataDog/remote-config @DataDog/apm-core-pyt
tests/internal/remoteconfig @DataDog/remote-config @DataDog/apm-core-python

# API SDK
ddtrace/trace/ @DataDog/apm-sdk-api-python
ddtrace/trace/ @DataDog/apm-sdk-api-python
ddtrace/_trace/ @DataDog/apm-sdk-api-python
ddtrace/opentelemetry/ @DataDog/apm-sdk-api-python
ddtrace/internal/opentelemetry @DataDog/apm-sdk-api-python
Expand All @@ -179,9 +178,12 @@ ddtrace/sampler.py @DataDog/apm-sdk-api-python
ddtrace/sampling_rule.py @DataDog/apm-sdk-api-python
ddtrace/internal/sampling.py @DataDog/apm-sdk-api-python
ddtrace/internal/tracemethods.py @DataDog/apm-sdk-api-python
ddttace/settings/_otel_remapper.py @DataDog/apm-sdk-api-python
ddtrace/runtime/ @DataDog/apm-sdk-api-python
ddtrace/internal/runtime/ @DataDog/apm-sdk-api-python
ddtrace/settings/_otel_remapper.py @DataDog/apm-sdk-api-python
tests/integration/test_priority_sampling.py @DataDog/apm-sdk-api-python
tests/integration/test_propagation.py @DataDog/apm-sdk-api-python
tests/runtime/ @DataDog/apm-sdk-api-python
tests/test_sampling.py @DataDog/apm-sdk-api-python
tests/test_tracemethods.py @DataDog/apm-sdk-api-python
tests/opentelemetry/ @DataDog/apm-sdk-api-python
Expand Down
13 changes: 2 additions & 11 deletions ddtrace/internal/runtime/runtime_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import ddtrace
from ddtrace.internal import atexit
from ddtrace.internal import forksafe
from ddtrace.internal import telemetry
from ddtrace.vendor.dogstatsd import DogStatsd

from .. import periodic
Expand All @@ -20,8 +19,6 @@
from .tag_collectors import TracerTagCollector


TELEMETRY_RUNTIMEMETRICS_ENABLED = "DD_RUNTIME_METRICS_ENABLED"

log = get_logger(__name__)


Expand Down Expand Up @@ -75,13 +72,13 @@ class RuntimeWorker(periodic.PeriodicService):
_instance = None # type: ClassVar[Optional[RuntimeWorker]]
_lock = forksafe.Lock()

def __init__(self, interval=_get_interval_or_default(), tracer=ddtrace.tracer, dogstatsd_url=None) -> None:
def __init__(self, interval=_get_interval_or_default(), tracer=None, dogstatsd_url=None) -> None:
super().__init__(interval=interval)
self.dogstatsd_url: Optional[str] = dogstatsd_url
self._dogstatsd_client: DogStatsd = get_dogstatsd_client(
self.dogstatsd_url or ddtrace.internal.agent.get_stats_url()
)
self.tracer: Optional[ddtrace.trace.Tracer] = tracer
self.tracer: ddtrace.trace.Tracer = tracer or ddtrace.tracer
self._runtime_metrics: RuntimeMetrics = RuntimeMetrics()
self._platform_tags: List[str] = self._format_tags(PlatformTags())

Expand All @@ -107,9 +104,6 @@ def disable(cls):
cls._instance = None
cls.enabled = False

# Report status to telemetry
telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, False, origin="unknown")

@classmethod
def _restart(cls):
cls.disable()
Expand All @@ -132,9 +126,6 @@ def enable(cls, flush_interval=None, tracer=None, dogstatsd_url=None):
cls._instance = runtime_worker
cls.enabled = True

# Report status to telemetry
telemetry.telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, True, origin="unknown")

def flush(self):
# type: () -> None
# Ensure runtime metrics have up-to-date tags (ex: service, env, version)
Expand Down
7 changes: 6 additions & 1 deletion ddtrace/runtime/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from typing import Optional # noqa:F401

import ddtrace.internal.runtime.runtime_metrics
from ddtrace.internal.telemetry import telemetry_writer


TELEMETRY_RUNTIMEMETRICS_ENABLED = "DD_RUNTIME_METRICS_ENABLED"


class _RuntimeMetricsStatus(type):
Expand Down Expand Up @@ -38,7 +42,7 @@ def enable(tracer=None, dogstatsd_url=None, flush_interval=None):
:param dogstatsd_url: The DogStatsD URL.
:param flush_interval: The flush interval.
"""

telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, True, origin="code")
ddtrace.internal.runtime.runtime_metrics.RuntimeWorker.enable(
tracer=tracer, dogstatsd_url=dogstatsd_url, flush_interval=flush_interval
)
Expand All @@ -52,6 +56,7 @@ def disable():
Once disabled, runtime metrics can be re-enabled by calling ``enable``
again.
"""
telemetry_writer.add_configuration(TELEMETRY_RUNTIMEMETRICS_ENABLED, False, origin="code")
ddtrace.internal.runtime.runtime_metrics.RuntimeWorker.disable()


Expand Down
74 changes: 8 additions & 66 deletions tests/runtime/test_runtime_metrics_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,51 +51,22 @@ def test_manually_start_runtime_metrics_telemetry(test_agent_session, run_python
"""
code = """
from ddtrace.internal.telemetry import telemetry_writer
telemetry_writer.start()

from ddtrace.runtime import RuntimeMetrics

assert not RuntimeMetrics._enabled
RuntimeMetrics.enable()
assert RuntimeMetrics._enabled

telemetry_writer.stop()
telemetry_writer.join(3)
telemetry_writer.periodic(force_flush=True)
"""

def find_telemetry_event(events, request_type):
e = [e for e in events if e["request_type"] == request_type]
assert len(e) == 1
return e[0]

_, stderr, status, _ = run_python_code_in_subprocess(code)
assert status == 0, stderr

events = test_agent_session.get_events(subprocess=True)
# app-started, app-closing, app-client-configuration-change, app-dependencies-loaded
assert len(events) == 4

# Note: the initial app-started event is going to say that it is not enabled, because
# we only look at the env variable DD_RUNTIME_METRICS_ENABLED to set the initial
# value.
#
# This test helps validate that manually enabling runtime metrics via code will
# will report the correct config status.
app_started_event = find_telemetry_event(events, "app-started")
config_changed_event = find_telemetry_event(events, "app-client-configuration-change")

runtimemetrics_enabled = [
c for c in app_started_event["payload"]["configuration"] if c["name"] == "runtimemetrics_enabled"
]
runtimemetrics_enabled = test_agent_session.get_configurations("DD_RUNTIME_METRICS_ENABLED")
assert len(runtimemetrics_enabled) == 1
assert runtimemetrics_enabled[0]["value"] is False
assert runtimemetrics_enabled[0]["origin"] == "unknown"

config = config_changed_event["payload"]["configuration"]
assert len(config) == 1
assert config[0]["name"] == "runtimemetrics_enabled"
assert config[0]["value"] is True
assert config[0]["origin"] == "unknown"
assert runtimemetrics_enabled[0]["value"]
assert runtimemetrics_enabled[0]["origin"] == "code"


def test_manually_stop_runtime_metrics_telemetry(test_agent_session, ddtrace_run_python_code_in_subprocess):
Expand All @@ -105,53 +76,24 @@ def test_manually_stop_runtime_metrics_telemetry(test_agent_session, ddtrace_run
"""
code = """
from ddtrace.internal.telemetry import telemetry_writer
telemetry_writer.start()

from ddtrace.runtime import RuntimeMetrics

assert RuntimeMetrics._enabled
RuntimeMetrics.disable()
assert not RuntimeMetrics._enabled

telemetry_writer.stop()
telemetry_writer.join(3)
telemetry_writer.periodic(force_flush=True)
"""

def find_telemetry_event(events, request_type):
e = [e for e in events if e["request_type"] == request_type]
assert len(e) == 1
return e[0]

env = os.environ.copy()
env["DD_RUNTIME_METRICS_ENABLED"] = "true"
_, stderr, status, _ = ddtrace_run_python_code_in_subprocess(code, env=env)
assert status == 0, stderr

events = test_agent_session.get_events(subprocess=True)
# app-started, app-closing, app-client-configuration-change, app-integrations-change, app-dependencies-loaded
assert len(events) == 5

# Note: the initial app-started event is going to say that it is enabled, because we
# set DD_RUNTIME_METRICS_ENABLED=true and are using ddtrace-run which will enable
# runtime metrics for us.
#
# This test helps validate that manually disabling runtime metrics via code will
# will report the correct config status.
app_started_event = find_telemetry_event(events, "app-started")
config_changed_event = find_telemetry_event(events, "app-client-configuration-change")

runtimemetrics_enabled = [
c for c in app_started_event["payload"]["configuration"] if c["name"] == "runtimemetrics_enabled"
]
runtimemetrics_enabled = test_agent_session.get_configurations("DD_RUNTIME_METRICS_ENABLED")
assert len(runtimemetrics_enabled) == 1
assert runtimemetrics_enabled[0]["value"] is True
assert runtimemetrics_enabled[0]["origin"] == "unknown"

config = config_changed_event["payload"]["configuration"]
assert len(config) == 1
assert config[0]["name"] == "runtimemetrics_enabled"
assert config[0]["value"] is False
assert config[0]["origin"] == "unknown"
assert runtimemetrics_enabled[0]["value"] is False
assert runtimemetrics_enabled[0]["origin"] == "code"


def test_start_runtime_metrics_via_env_var(monkeypatch, ddtrace_run_python_code_in_subprocess):
Expand Down
4 changes: 2 additions & 2 deletions tests/telemetry/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_app_started_event(telemetry_writer, test_agent_session, mock_time):
{"name": "DD_PROFILING_MAX_FRAMES", "origin": "unknown", "value": 64},
{"name": "DD_REMOTE_CONFIGURATION_ENABLED", "origin": "unknown", "value": False},
{"name": "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS", "origin": "unknown", "value": 5.0},
{"name": "DD_RUNTIME_METRICS_ENABLED", "origin": "unknown", "value": False},
{"name": "DD_RUNTIME_METRICS_ENABLED", "origin": "unknown", "value": True},
{"name": "DD_SERVICE_MAPPING", "origin": "unknown", "value": ""},
{"name": "DD_SPAN_SAMPLING_RULES", "origin": "unknown", "value": None},
{"name": "DD_SPAN_SAMPLING_RULES_FILE", "origin": "unknown", "value": None},
Expand Down Expand Up @@ -434,7 +434,7 @@ def test_app_started_event_configuration_override(test_agent_session, run_python
{"name": "DD_PROFILING__FORCE_LEGACY_EXPORTER", "origin": "default", "value": False},
{"name": "DD_REMOTE_CONFIGURATION_ENABLED", "origin": "env_var", "value": True},
{"name": "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS", "origin": "env_var", "value": 1.0},
{"name": "DD_RUNTIME_METRICS_ENABLED", "origin": "unknown", "value": False},
{"name": "DD_RUNTIME_METRICS_ENABLED", "origin": "env_var", "value": True},
{"name": "DD_SERVICE", "origin": "default", "value": DEFAULT_DDTRACE_SUBPROCESS_TEST_SERVICE_NAME},
{"name": "DD_SERVICE_MAPPING", "origin": "env_var", "value": "default_dd_service:remapped_dd_service"},
{"name": "DD_SITE", "origin": "env_var", "value": "datadoghq.com"},
Expand Down