Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions .github/scripts/verify-metric-descriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/usr/bin/env python3
"""
Verify that metric descriptions in Java code match the brief field in semantic conventions.
This script clones the semantic-conventions repo and compares metric descriptions.
"""

import os
import sys
import yaml
import subprocess
from pathlib import Path

# Metrics to check
METRICS_TO_CHECK = {
# RPC metrics
"rpc.server.duration": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/rpc/RpcServerMetrics.java",
"yaml": "rpc/metrics.yaml",
"metric_id": "metric.rpc.server.duration"
},
"rpc.client.duration": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/rpc/RpcClientMetrics.java",
"yaml": "rpc/metrics.yaml",
"metric_id": "metric.rpc.client.duration"
},
# HTTP metrics
"http.server.request.duration": {
"file": "instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/semconv/http/HttpServerMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.server.request.duration"
},
"http.client.request.duration": {
"file": "instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/semconv/http/HttpClientMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.client.request.duration"
},
"http.server.active_requests": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/http/HttpServerExperimentalMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.server.active_requests"
},
"http.server.request.body.size": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/http/HttpServerExperimentalMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.server.request.body.size"
},
"http.server.response.body.size": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/http/HttpServerExperimentalMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.server.response.body.size"
},
"http.client.request.body.size": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/http/HttpClientExperimentalMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.client.request.body.size"
},
"http.client.response.body.size": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/http/HttpClientExperimentalMetrics.java",
"yaml": "http/metrics.yaml",
"metric_id": "metric.http.client.response.body.size"
},
# Database metrics
"db.client.operation.duration": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/DbClientMetrics.java",
"yaml": "database/metrics.yaml",
"metric_id": "metric.db.client.operation.duration"
},
"db.client.connection.count": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/DbConnectionPoolMetrics.java",
"yaml": "database/metrics.yaml",
"metric_id": "metric.db.client.connection.count"
},
# GenAI metrics
"gen_ai.client.token.usage": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/genai/GenAiClientMetrics.java",
"yaml": "gen-ai/metrics.yaml",
"metric_id": "metric.gen_ai.client.token.usage"
},
"gen_ai.client.operation.duration": {
"file": "instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/genai/GenAiClientMetrics.java",
"yaml": "gen-ai/metrics.yaml",
"metric_id": "metric.gen_ai.client.operation.duration"
},
}


def clone_semconv_repo(temp_dir):
"""Clone the semantic-conventions repository."""
semconv_path = os.path.join(temp_dir, "semantic-conventions")
if not os.path.exists(semconv_path):
print("Cloning semantic-conventions repository...")
subprocess.run(
["git", "clone", "--depth", "1",
"https://github.com/open-telemetry/semantic-conventions.git",
semconv_path],
check=True
)
return semconv_path


def get_semconv_description(semconv_path, yaml_file, metric_id):
"""Get the brief description from semantic conventions YAML."""
yaml_path = os.path.join(semconv_path, "model", yaml_file)

if not os.path.exists(yaml_path):
return None

with open(yaml_path, 'r') as f:
data = yaml.safe_load(f)

if 'groups' not in data:
return None

for group in data['groups']:
if group.get('id') == metric_id:
return group.get('brief')

return None


def extract_description_from_java(file_path, metric_name):
"""Extract the description from a Java metrics file."""
if not os.path.exists(file_path):
return None

with open(file_path, 'r') as f:
content = f.read()

# Look for the metric builder pattern with setDescription
import re
# Match .histogramBuilder("metric.name") or .upDownCounterBuilder("metric.name")
# followed by .setDescription("description") - handle multiline strings
# First, try to find where the metric name appears
metric_pattern = rf'(?:histogram|counter|upDownCounter)Builder\([^)]*"{re.escape(metric_name)}"[^)]*\)'
metric_match = re.search(metric_pattern, content, re.DOTALL)

if metric_match:
# Look for setDescription after the metric builder
# Search from the metric builder position onwards
remaining_content = content[metric_match.end():]

# Match setDescription with either single-line or multi-line string
desc_pattern = r'\.setDescription\(\s*"([^"]+)"\s*\)'
desc_match = re.search(desc_pattern, remaining_content, re.DOTALL)

if desc_match:
return desc_match.group(1)

return None


def main():
repo_root = os.getcwd()
temp_dir = "/tmp/semconv-check"
os.makedirs(temp_dir, exist_ok=True)

# Clone semantic conventions
semconv_path = clone_semconv_repo(temp_dir)

mismatches = []

for metric_name, info in METRICS_TO_CHECK.items():
java_file = os.path.join(repo_root, info["file"])

# Get description from Java code
java_desc = extract_description_from_java(java_file, metric_name)

if java_desc is None:
print(f"⚠️ {metric_name}: Could not extract description from Java code")
continue

# Get description from semantic conventions
semconv_desc = get_semconv_description(semconv_path, info["yaml"], info["metric_id"])

if semconv_desc is None:
print(f"⚠️ {metric_name}: Not found in semantic conventions")
continue

# Compare
if java_desc == semconv_desc:
print(f"✅ {metric_name}: Match")
else:
print(f"❌ {metric_name}: MISMATCH")
print(f" Java: '{java_desc}'")
print(f" Semconv: '{semconv_desc}'")
mismatches.append({
'metric': metric_name,
'file': info['file'],
'java': java_desc,
'semconv': semconv_desc
})

if mismatches:
print(f"\n❌ Found {len(mismatches)} mismatch(es)!")

# Write mismatches to a file for the workflow to use
with open(os.path.join(temp_dir, "mismatches.txt"), 'w') as f:
for m in mismatches:
f.write(f"Metric: {m['metric']}\n")
f.write(f"File: {m['file']}\n")
f.write(f"Java description: {m['java']}\n")
f.write(f"Expected (semconv): {m['semconv']}\n")
f.write("\n")

return 1
else:
print("\n✅ All metric descriptions match semantic conventions!")
return 0


if __name__ == "__main__":
sys.exit(main())
125 changes: 125 additions & 0 deletions .github/workflows/verify-metric-descriptions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: Verify metric descriptions match semantic conventions

on:
schedule:
# Run daily at 9:00 AM UTC
- cron: "0 9 * * *"
workflow_dispatch:

permissions:
contents: read

jobs:
check-descriptions:
runs-on: ubuntu-latest
outputs:
mismatches-found: ${{ steps.check.outputs.mismatches-found }}
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

- name: Set up Python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyyaml

- id: check
name: Check metric descriptions
run: |
if ./.github/scripts/verify-metric-descriptions.py; then
echo "mismatches-found=false" >> $GITHUB_OUTPUT
else
echo "mismatches-found=true" >> $GITHUB_OUTPUT
# Save the mismatches for the PR body
if [ -f /tmp/semconv-check/mismatches.txt ]; then
cp /tmp/semconv-check/mismatches.txt ${{ runner.temp }}/mismatches.txt
fi
fi

- name: Upload mismatches
if: steps.check.outputs.mismatches-found == 'true'
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v5.0.0
with:
name: mismatches
path: ${{ runner.temp }}/mismatches.txt

create-pr:
permissions:
contents: write # for git push to PR branch
pull-requests: write # for creating PRs
runs-on: ubuntu-latest
if: needs.check-descriptions.outputs.mismatches-found == 'true'
needs:
- check-descriptions
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

- name: Download mismatches
uses: actions/download-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v5.0.0
with:
name: mismatches
path: ${{ runner.temp }}

- name: Check if PR already exists
id: check-pr
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
matches=$(gh pr list \
--author app/otelbot \
--state open \
--search "in:title \"Update metric descriptions to match semantic conventions\"")
if [ ! -z "$matches" ]
then
echo "already-opened=true" >> $GITHUB_OUTPUT
echo "PR already exists, skipping creation"
else
echo "already-opened=false" >> $GITHUB_OUTPUT
fi

- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
if: steps.check-pr.outputs.already-opened != 'true'
id: otelbot-token
with:
app-id: ${{ vars.OTELBOT_APP_ID }}
private-key: ${{ secrets.OTELBOT_PRIVATE_KEY }}

- name: Create issue about mismatches
if: steps.check-pr.outputs.already-opened != 'true'
env:
GH_TOKEN: ${{ steps.otelbot-token.outputs.token }}
run: |
# Read the mismatches file
mismatches_content=$(cat ${{ runner.temp }}/mismatches.txt)

# Create PR body
body="## Metric Description Mismatches Found

The automated verification found that some metric descriptions in the Java code do not match the \`brief\` field in the OpenTelemetry semantic conventions.

### Mismatches

\`\`\`
${mismatches_content}
\`\`\`

### Action Required

Please review these mismatches and update the metric descriptions in the Java code to match the semantic conventions. The descriptions should exactly match the \`brief\` field in the corresponding YAML files in the [semantic-conventions repository](https://github.com/open-telemetry/semantic-conventions).

### Reference

- Semantic conventions repository: https://github.com/open-telemetry/semantic-conventions
- Related issue: #9478

This PR was automatically created by the [verify-metric-descriptions workflow]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/workflows/verify-metric-descriptions.yml)."

# Create the issue
gh issue create \
--title "Metric descriptions do not match semantic conventions" \
--body "$body" \
--label "bug,metric"
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public ObservableLongMeasurement connections() {
.upDownCounterBuilder(metricName)
.setUnit(emitStableDatabaseSemconv() ? "{connection}" : "{connections}")
.setDescription(
"The number of connections that are currently in state described by the state attribute.")
"The number of connections that are currently in state described by the `state` attribute.")
.buildObserver();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ private GenAiClientMetrics(Meter meter) {
.histogramBuilder("gen_ai.client.token.usage")
.ofLongs()
.setUnit("{token}")
.setDescription("Measures number of input and output tokens used.")
.setDescription("Number of input and output tokens used.")
.setExplicitBucketBoundariesAdvice(GenAiMetricsAdvice.CLIENT_TOKEN_USAGE_BUCKETS);
GenAiMetricsAdvice.applyClientTokenUsageAdvice(tokenUsageBuilder);
this.tokenUsage = tokenUsageBuilder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private RpcClientMetrics(Meter meter) {
DoubleHistogramBuilder durationBuilder =
meter
.histogramBuilder("rpc.client.duration")
.setDescription("The duration of an outbound RPC invocation.")
.setDescription("Measures the duration of outbound RPC.")
.setUnit("ms");
RpcMetricsAdvice.applyClientDurationAdvice(durationBuilder);
clientDurationHistogram = durationBuilder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private RpcServerMetrics(Meter meter) {
DoubleHistogramBuilder durationBuilder =
meter
.histogramBuilder("rpc.server.duration")
.setDescription("The duration of an inbound RPC invocation.")
.setDescription("Measures the duration of inbound RPC.")
.setUnit("ms");
RpcMetricsAdvice.applyServerDurationAdvice(durationBuilder);
serverDurationHistogram = durationBuilder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void collectsMetrics() {
assertThat(metric)
.hasName("rpc.client.duration")
.hasUnit("ms")
.hasDescription("Measures the duration of outbound RPC.")
.hasHistogramSatisfying(
histogram ->
histogram.hasPointsSatisfying(
Expand Down Expand Up @@ -112,6 +113,7 @@ void collectsMetrics() {
assertThat(metric)
.hasName("rpc.client.duration")
.hasUnit("ms")
.hasDescription("Measures the duration of outbound RPC.")
.hasHistogramSatisfying(
histogram ->
histogram.hasPointsSatisfying(
Expand Down
Loading