Skip to content

Commit 7bf7202

Browse files
authored
Add Application Signals runtime metrics (#244)
## Feature request Add runtime metrics collection into Application Signals. Runtime metrics will be enabled by default if `OTEL_AWS_APPLICATION_SIGNALS_ENABLED` is `true`, and can be disabled separately by setting `OTEL_AWS_APPLICATION_SIGNALS_RUNTIME_ENABLED` to `false`. ## Description of changes: 1. Add `ScopeBasedPeriodicExportingMetricReader` to copy metrics from `opentelemetry.instrumentation.system_metrics` instrumentation scope to Application Signals exporter. 2. Set `aws.local.service` into resource attributes. 3. Add views to workaround open-telemetry/opentelemetry-python-contrib#2861. 4. Add contract testing for runtime metrics. ## Result example ``` { "resource_metrics": [ { "resource": { "attributes": { "telemetry.sdk.language": "python", "telemetry.sdk.name": "opentelemetry", "telemetry.sdk.version": "1.25.0", "service.name": "unknown_service", "cloud.provider": "aws", "cloud.platform": "aws_ec2", "cloud.account.id": "633750930120", "cloud.region": "us-east-1", "cloud.availability_zone": "us-east-1a", "host.id": "i-03ff80a878a803e0e", "host.type": "t2.medium", "host.name": "ip-172-31-25-215.ec2.internal", "telemetry.auto.version": "0.3.0.dev0-aws", "aws.local.service": "UnknownService" }, "schema_url": "" }, "scope_metrics": [ { "scope": { "name": "opentelemetry.instrumentation.system_metrics", "version": "0.46b0", "schema_url": "https://opentelemetry.io/schemas/1.11.0" }, "metrics": [ { "name": "process.runtime.cpython.memory", "description": "Runtime cpython memory", "unit": "bytes", "data": { "data_points": [ { "attributes": { "type": "rss" }, "start_time_unix_nano": 1724953385390606423, "time_unix_nano": 1724953385391126083, "value": 75747328 }, { "attributes": { "type": "vms" }, "start_time_unix_nano": 1724953385390606423, "time_unix_nano": 1724953385391126083, "value": 546709504 } ], "aggregation_temporality": 2, "is_monotonic": false } } ] } ] } ] } ``` By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
2 parents 83b87cf + acf7f68 commit 7bf7202

12 files changed

+519
-37
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_metric_attribute_generator.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
AWS_STEPFUNCTIONS_ACTIVITY_ARN,
3030
AWS_STEPFUNCTIONS_STATEMACHINE_ARN,
3131
)
32+
from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute
3233
from amazon.opentelemetry.distro._aws_span_processing_util import (
3334
GEN_AI_REQUEST_MODEL,
3435
LOCAL_ROOT,
@@ -37,7 +38,6 @@
3738
UNKNOWN_OPERATION,
3839
UNKNOWN_REMOTE_OPERATION,
3940
UNKNOWN_REMOTE_SERVICE,
40-
UNKNOWN_SERVICE,
4141
extract_api_path_value,
4242
get_egress_operation,
4343
get_ingress_operation,
@@ -54,12 +54,11 @@
5454
MetricAttributeGenerator,
5555
)
5656
from amazon.opentelemetry.distro.sqs_url_parser import SqsUrlParser
57-
from opentelemetry.sdk.resources import Resource, ResourceAttributes
57+
from opentelemetry.sdk.resources import Resource
5858
from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan
5959
from opentelemetry.semconv.trace import SpanAttributes
6060

6161
# Pertinent OTEL attribute keys
62-
_SERVICE_NAME: str = ResourceAttributes.SERVICE_NAME
6362
_DB_CONNECTION_STRING: str = SpanAttributes.DB_CONNECTION_STRING
6463
_DB_NAME: str = SpanAttributes.DB_NAME
6564
_DB_OPERATION: str = SpanAttributes.DB_OPERATION
@@ -103,10 +102,6 @@
103102
# Special DEPENDENCY attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
104103
_GRAPHQL: str = "graphql"
105104

106-
# As per https://opentelemetry.io/docs/specs/semconv/resource/#service, if service name is not specified, SDK defaults
107-
# the service name to unknown_service:<process name> or just unknown_service.
108-
_OTEL_UNKNOWN_SERVICE_PREFIX: str = "unknown_service"
109-
110105
_logger: Logger = getLogger(__name__)
111106

112107

@@ -152,15 +147,11 @@ def _generate_dependency_metric_attributes(span: ReadableSpan, resource: Resourc
152147

153148

154149
def _set_service(resource: Resource, span: ReadableSpan, attributes: BoundedAttributes) -> None:
155-
"""Service is always derived from SERVICE_NAME"""
156-
service: str = resource.attributes.get(_SERVICE_NAME)
157-
158-
# In practice the service name is never None, but we can be defensive here.
159-
if service is None or service.startswith(_OTEL_UNKNOWN_SERVICE_PREFIX):
150+
service_name, is_unknown = get_service_attribute(resource)
151+
if is_unknown:
160152
_log_unknown_attribute(AWS_LOCAL_SERVICE, span)
161-
service = UNKNOWN_SERVICE
162153

163-
attributes[AWS_LOCAL_SERVICE] = service
154+
attributes[AWS_LOCAL_SERVICE] = service_name
164155

165156

166157
def _set_ingress_operation(span: ReadableSpan, attributes: BoundedAttributes) -> None:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from amazon.opentelemetry.distro._aws_span_processing_util import UNKNOWN_SERVICE
4+
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
5+
6+
# As per https://opentelemetry.io/docs/specs/semconv/resource/#service, if service name is not specified, SDK defaults
7+
# the service name to unknown_service:<process name> or just unknown_service.
8+
_OTEL_UNKNOWN_SERVICE_PREFIX: str = "unknown_service"
9+
10+
11+
def get_service_attribute(resource: Resource) -> (str, bool):
12+
"""Service is always derived from SERVICE_NAME"""
13+
service: str = resource.attributes.get(SERVICE_NAME)
14+
15+
# In practice the service name is never None, but we can be defensive here.
16+
if service is None or service.startswith(_OTEL_UNKNOWN_SERVICE_PREFIX):
17+
return UNKNOWN_SERVICE, True
18+
19+
return service, False

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/aws_opentelemetry_configurator.py

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
# Modifications Copyright The OpenTelemetry Authors. Licensed under the Apache License 2.0 License.
44
import os
55
from logging import Logger, getLogger
6-
from typing import ClassVar, Dict, Type
6+
from typing import ClassVar, Dict, List, Type, Union
77

88
from importlib_metadata import version
99
from typing_extensions import override
1010

11+
from amazon.opentelemetry.distro._aws_attribute_keys import AWS_LOCAL_SERVICE
12+
from amazon.opentelemetry.distro._aws_resource_attribute_configurator import get_service_attribute
1113
from amazon.opentelemetry.distro.always_record_sampler import AlwaysRecordSampler
1214
from amazon.opentelemetry.distro.attribute_propagating_span_processor_builder import (
1315
AttributePropagatingSpanProcessorBuilder,
@@ -19,8 +21,11 @@
1921
from amazon.opentelemetry.distro.aws_span_metrics_processor_builder import AwsSpanMetricsProcessorBuilder
2022
from amazon.opentelemetry.distro.otlp_udp_exporter import OTLPUdpSpanExporter
2123
from amazon.opentelemetry.distro.sampler.aws_xray_remote_sampler import AwsXRayRemoteSampler
24+
from amazon.opentelemetry.distro.scope_based_exporter import ScopeBasedPeriodicExportingMetricReader
25+
from amazon.opentelemetry.distro.scope_based_filtering_view import ScopeBasedRetainingView
2226
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as OTLPHttpOTLPMetricExporter
2327
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
28+
from opentelemetry.metrics import set_meter_provider
2429
from opentelemetry.sdk._configuration import (
2530
_get_exporter_names,
2631
_get_id_generator,
@@ -29,7 +34,6 @@
2934
_import_id_generator,
3035
_import_sampler,
3136
_init_logging,
32-
_init_metrics,
3337
_OTelSDKConfigurator,
3438
)
3539
from opentelemetry.sdk.environment_variables import (
@@ -50,7 +54,13 @@
5054
ObservableUpDownCounter,
5155
UpDownCounter,
5256
)
53-
from opentelemetry.sdk.metrics.export import AggregationTemporality, PeriodicExportingMetricReader
57+
from opentelemetry.sdk.metrics.export import (
58+
AggregationTemporality,
59+
MetricExporter,
60+
MetricReader,
61+
PeriodicExportingMetricReader,
62+
)
63+
from opentelemetry.sdk.metrics.view import LastValueAggregation, View
5464
from opentelemetry.sdk.resources import Resource, get_aggregated_resources
5565
from opentelemetry.sdk.trace import TracerProvider
5666
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter
@@ -59,15 +69,17 @@
5969
from opentelemetry.semconv.resource import ResourceAttributes
6070
from opentelemetry.trace import set_tracer_provider
6171

62-
APP_SIGNALS_ENABLED_CONFIG = "OTEL_AWS_APP_SIGNALS_ENABLED"
72+
DEPRECATED_APP_SIGNALS_ENABLED_CONFIG = "OTEL_AWS_APP_SIGNALS_ENABLED"
6373
APPLICATION_SIGNALS_ENABLED_CONFIG = "OTEL_AWS_APPLICATION_SIGNALS_ENABLED"
64-
APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG = "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT"
74+
APPLICATION_SIGNALS_RUNTIME_ENABLED_CONFIG = "OTEL_AWS_APPLICATION_SIGNALS_RUNTIME_ENABLED"
75+
DEPRECATED_APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG = "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT"
6576
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG = "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT"
6677
METRIC_EXPORT_INTERVAL_CONFIG = "OTEL_METRIC_EXPORT_INTERVAL"
6778
DEFAULT_METRIC_EXPORT_INTERVAL = 60000.0
6879
AWS_LAMBDA_FUNCTION_NAME_CONFIG = "AWS_LAMBDA_FUNCTION_NAME"
6980
AWS_XRAY_DAEMON_ADDRESS_CONFIG = "AWS_XRAY_DAEMON_ADDRESS"
7081
OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED_CONFIG = "OTEL_AWS_PYTHON_DEFER_TO_WORKERS_ENABLED"
82+
SYSTEM_METRICS_INSTRUMENTATION_SCOPE_NAME = "opentelemetry.instrumentation.system_metrics"
7183
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"
7284
# UDP package size is not larger than 64KB
7385
LAMBDA_SPAN_EXPORT_BATCH_SIZE = 10
@@ -127,7 +139,7 @@ def _initialize_components():
127139
else []
128140
)
129141

130-
resource = get_aggregated_resources(resource_detectors).merge(Resource.create(auto_resource))
142+
resource = _customize_resource(get_aggregated_resources(resource_detectors).merge(Resource.create(auto_resource)))
131143

132144
sampler_name = _get_sampler()
133145
sampler = _custom_import_sampler(sampler_name, resource)
@@ -171,6 +183,27 @@ def _init_tracing(
171183
set_tracer_provider(trace_provider)
172184

173185

186+
def _init_metrics(
187+
exporters_or_readers: Dict[str, Union[Type[MetricExporter], Type[MetricReader]]],
188+
resource: Resource = None,
189+
):
190+
metric_readers = []
191+
views = []
192+
193+
for _, exporter_or_reader_class in exporters_or_readers.items():
194+
exporter_args = {}
195+
196+
if issubclass(exporter_or_reader_class, MetricReader):
197+
metric_readers.append(exporter_or_reader_class(**exporter_args))
198+
else:
199+
metric_readers.append(PeriodicExportingMetricReader(exporter_or_reader_class(**exporter_args)))
200+
201+
_customize_metric_exporters(metric_readers, views)
202+
203+
provider = MeterProvider(resource=resource, metric_readers=metric_readers, views=views)
204+
set_meter_provider(provider)
205+
206+
174207
# END The OpenTelemetry Authors code
175208

176209

@@ -303,14 +336,9 @@ def _customize_span_processors(provider: TracerProvider, resource: Resource) ->
303336
# Construct meterProvider
304337
_logger.info("AWS Application Signals enabled")
305338
otel_metric_exporter = ApplicationSignalsExporterProvider().create_exporter()
306-
export_interval_millis = float(os.environ.get(METRIC_EXPORT_INTERVAL_CONFIG, DEFAULT_METRIC_EXPORT_INTERVAL))
307-
_logger.debug("Span Metrics export interval: %s", export_interval_millis)
308-
# Cap export interval to 60 seconds. This is currently required for metrics-trace correlation to work correctly.
309-
if export_interval_millis > DEFAULT_METRIC_EXPORT_INTERVAL:
310-
export_interval_millis = DEFAULT_METRIC_EXPORT_INTERVAL
311-
_logger.info("AWS Application Signals metrics export interval capped to %s", export_interval_millis)
339+
312340
periodic_exporting_metric_reader = PeriodicExportingMetricReader(
313-
exporter=otel_metric_exporter, export_interval_millis=export_interval_millis
341+
exporter=otel_metric_exporter, export_interval_millis=_get_metric_export_interval()
314342
)
315343
meter_provider: MeterProvider = MeterProvider(resource=resource, metric_readers=[periodic_exporting_metric_reader])
316344
# Construct and set application signals metrics processor
@@ -319,25 +347,106 @@ def _customize_span_processors(provider: TracerProvider, resource: Resource) ->
319347
return
320348

321349

350+
def _customize_metric_exporters(metric_readers: List[MetricReader], views: List[View]) -> None:
351+
if _is_application_signals_runtime_enabled():
352+
_get_runtime_metric_views(views, 0 == len(metric_readers))
353+
354+
application_signals_metric_exporter = ApplicationSignalsExporterProvider().create_exporter()
355+
scope_based_periodic_exporting_metric_reader = ScopeBasedPeriodicExportingMetricReader(
356+
exporter=application_signals_metric_exporter,
357+
export_interval_millis=_get_metric_export_interval(),
358+
registered_scope_names={SYSTEM_METRICS_INSTRUMENTATION_SCOPE_NAME},
359+
)
360+
metric_readers.append(scope_based_periodic_exporting_metric_reader)
361+
362+
363+
def _get_runtime_metric_views(views: List[View], retain_runtime_only: bool) -> None:
364+
runtime_metrics_scope_name = SYSTEM_METRICS_INSTRUMENTATION_SCOPE_NAME
365+
_logger.info("Registered scope %s", runtime_metrics_scope_name)
366+
views.append(
367+
View(
368+
instrument_name="system.network.connections",
369+
meter_name=runtime_metrics_scope_name,
370+
aggregation=LastValueAggregation(),
371+
)
372+
)
373+
views.append(
374+
View(
375+
instrument_name="process.open_file_descriptor.count",
376+
meter_name=runtime_metrics_scope_name,
377+
aggregation=LastValueAggregation(),
378+
)
379+
)
380+
views.append(
381+
View(
382+
instrument_name="process.runtime.*.memory",
383+
meter_name=runtime_metrics_scope_name,
384+
aggregation=LastValueAggregation(),
385+
)
386+
)
387+
views.append(
388+
View(
389+
instrument_name="process.runtime.*.gc_count",
390+
meter_name=runtime_metrics_scope_name,
391+
aggregation=LastValueAggregation(),
392+
)
393+
)
394+
views.append(
395+
View(
396+
instrument_name="process.runtime.*.thread_count",
397+
meter_name=runtime_metrics_scope_name,
398+
aggregation=LastValueAggregation(),
399+
)
400+
)
401+
if retain_runtime_only:
402+
views.append(ScopeBasedRetainingView(meter_name=runtime_metrics_scope_name))
403+
404+
322405
def _customize_versions(auto_resource: Dict[str, any]) -> Dict[str, any]:
323406
distro_version = version("aws-opentelemetry-distro")
324407
auto_resource[ResourceAttributes.TELEMETRY_AUTO_VERSION] = distro_version + "-aws"
325408
_logger.debug("aws-opentelementry-distro - version: %s", auto_resource[ResourceAttributes.TELEMETRY_AUTO_VERSION])
326409
return auto_resource
327410

328411

412+
def _customize_resource(resource: Resource) -> Resource:
413+
service_name, is_unknown = get_service_attribute(resource)
414+
if is_unknown:
415+
_logger.debug("No valid service name found")
416+
417+
return resource.merge(Resource.create({AWS_LOCAL_SERVICE: service_name}))
418+
419+
329420
def _is_application_signals_enabled():
330421
return (
331-
os.environ.get(APPLICATION_SIGNALS_ENABLED_CONFIG, os.environ.get(APP_SIGNALS_ENABLED_CONFIG, "false")).lower()
422+
os.environ.get(
423+
APPLICATION_SIGNALS_ENABLED_CONFIG, os.environ.get(DEPRECATED_APP_SIGNALS_ENABLED_CONFIG, "false")
424+
).lower()
332425
== "true"
333426
)
334427

335428

429+
def _is_application_signals_runtime_enabled():
430+
return _is_application_signals_enabled() and (
431+
os.environ.get(APPLICATION_SIGNALS_RUNTIME_ENABLED_CONFIG, "true").lower() == "true"
432+
)
433+
434+
336435
def _is_lambda_environment():
337436
# detect if running in AWS Lambda environment
338437
return AWS_LAMBDA_FUNCTION_NAME_CONFIG in os.environ
339438

340439

440+
def _get_metric_export_interval():
441+
export_interval_millis = float(os.environ.get(METRIC_EXPORT_INTERVAL_CONFIG, DEFAULT_METRIC_EXPORT_INTERVAL))
442+
_logger.debug("Span Metrics export interval: %s", export_interval_millis)
443+
# Cap export interval to 60 seconds. This is currently required for metrics-trace correlation to work correctly.
444+
if export_interval_millis > DEFAULT_METRIC_EXPORT_INTERVAL:
445+
export_interval_millis = DEFAULT_METRIC_EXPORT_INTERVAL
446+
_logger.info("AWS Application Signals metrics export interval capped to %s", export_interval_millis)
447+
return export_interval_millis
448+
449+
341450
def _span_export_batch_size():
342451
return LAMBDA_SPAN_EXPORT_BATCH_SIZE if _is_lambda_environment() else None
343452

@@ -372,7 +481,7 @@ def create_exporter(self):
372481
if protocol == "http/protobuf":
373482
application_signals_endpoint = os.environ.get(
374483
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG,
375-
os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316/v1/metrics"),
484+
os.environ.get(DEPRECATED_APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "http://localhost:4316/v1/metrics"),
376485
)
377486
_logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint)
378487
return OTLPHttpOTLPMetricExporter(
@@ -388,7 +497,7 @@ def create_exporter(self):
388497

389498
application_signals_endpoint = os.environ.get(
390499
APPLICATION_SIGNALS_EXPORTER_ENDPOINT_CONFIG,
391-
os.environ.get(APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "localhost:4315"),
500+
os.environ.get(DEPRECATED_APP_SIGNALS_EXPORTER_ENDPOINT_CONFIG, "localhost:4315"),
392501
)
393502
_logger.debug("AWS Application Signals export endpoint: %s", application_signals_endpoint)
394503
return OTLPGrpcOTLPMetricExporter(
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from logging import Logger, getLogger
4+
from typing import Optional, Set
5+
6+
from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY, attach, detach, set_value
7+
from opentelemetry.sdk.metrics.export import MetricExporter, MetricsData, PeriodicExportingMetricReader, ResourceMetrics
8+
9+
_logger: Logger = getLogger(__name__)
10+
11+
12+
class ScopeBasedPeriodicExportingMetricReader(PeriodicExportingMetricReader):
13+
14+
def __init__(
15+
self,
16+
exporter: MetricExporter,
17+
export_interval_millis: Optional[float] = None,
18+
export_timeout_millis: Optional[float] = None,
19+
registered_scope_names: Set[str] = None,
20+
):
21+
super().__init__(exporter, export_interval_millis, export_timeout_millis)
22+
self._registered_scope_names = registered_scope_names
23+
24+
def _receive_metrics(
25+
self,
26+
metrics_data: MetricsData,
27+
timeout_millis: float = 10_000,
28+
**kwargs,
29+
) -> None:
30+
31+
token = attach(set_value(_SUPPRESS_INSTRUMENTATION_KEY, True))
32+
# pylint: disable=broad-exception-caught,invalid-name
33+
try:
34+
with self._export_lock:
35+
exporting_resource_metrics = []
36+
for metric in metrics_data.resource_metrics:
37+
exporting_scope_metrics = []
38+
for scope_metric in metric.scope_metrics:
39+
if scope_metric.scope.name in self._registered_scope_names:
40+
exporting_scope_metrics.append(scope_metric)
41+
if len(exporting_scope_metrics) > 0:
42+
exporting_resource_metrics.append(
43+
ResourceMetrics(
44+
resource=metric.resource,
45+
scope_metrics=exporting_scope_metrics,
46+
schema_url=metric.schema_url,
47+
)
48+
)
49+
if len(exporting_resource_metrics) > 0:
50+
new_metrics_data = MetricsData(resource_metrics=exporting_resource_metrics)
51+
self._exporter.export(new_metrics_data, timeout_millis=timeout_millis)
52+
except Exception as e:
53+
_logger.exception("Exception while exporting metrics %s", str(e))
54+
detach(token)

0 commit comments

Comments
 (0)