Skip to content

Commit 9653af9

Browse files
committed
[o11y] Report SpanOpen event earlier
1 parent 4d90cb5 commit 9653af9

File tree

8 files changed

+73
-42
lines changed

8 files changed

+73
-42
lines changed

src/workerd/api/actor-kv-test-tail.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export default testTailHandler;
1111
export const test = {
1212
async test(ctrl, env, ctx) {
1313
const expected = [
14+
{ name: 'durable_object_subrequest', closed: true },
1415
{ name: 'durable_object_storage_put', closed: true },
1516
{ name: 'durable_object_storage_put', closed: true },
1617
{ name: 'durable_object_storage_get', closed: true },
@@ -23,7 +24,6 @@ export const test = {
2324
{ name: 'durable_object_storage_deleteAlarm', closed: true },
2425
{ name: 'durable_object_storage_transaction', closed: true },
2526
{ name: 'durable_object_storage_sync', closed: true },
26-
{ name: 'durable_object_subrequest', closed: true },
2727
];
2828

2929
await Promise.allSettled(invocationPromises);

src/workerd/api/r2-instrumentation-test.js

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,19 @@ export const test = {
192192
closed: true,
193193
},
194194
{
195-
name: 'r2_put',
195+
name: 'r2_get',
196196
'cloudflare.binding.type': 'r2',
197197
'cloudflare.binding.name': 'BUCKET',
198-
'cloudflare.r2.operation': 'PutObject',
198+
'cloudflare.r2.operation': 'GetObject',
199199
'cloudflare.r2.bucket': 'r2-test',
200-
'cloudflare.r2.request.key': 'throwOnInvalidEtag',
200+
'cloudflare.r2.request.key': 'rangeSuff',
201+
'cloudflare.r2.request.range.suffix': 2n,
202+
'cloudflare.r2.response.success': true,
203+
'cloudflare.r2.response.etag': 'objectEtag',
204+
'cloudflare.r2.response.size': 123,
205+
'cloudflare.r2.response.uploaded': '2024-08-27T14:00:57.918Z',
206+
'cloudflare.r2.response.storage_class': 'Standard',
207+
'cloudflare.r2.response.custom_metadata': true,
201208
closed: true,
202209
},
203210
{
@@ -210,19 +217,12 @@ export const test = {
210217
closed: true,
211218
},
212219
{
213-
name: 'r2_get',
220+
name: 'r2_put',
214221
'cloudflare.binding.type': 'r2',
215222
'cloudflare.binding.name': 'BUCKET',
216-
'cloudflare.r2.operation': 'GetObject',
223+
'cloudflare.r2.operation': 'PutObject',
217224
'cloudflare.r2.bucket': 'r2-test',
218-
'cloudflare.r2.request.key': 'rangeSuff',
219-
'cloudflare.r2.request.range.suffix': 2n,
220-
'cloudflare.r2.response.success': true,
221-
'cloudflare.r2.response.etag': 'objectEtag',
222-
'cloudflare.r2.response.size': 123,
223-
'cloudflare.r2.response.uploaded': '2024-08-27T14:00:57.918Z',
224-
'cloudflare.r2.response.storage_class': 'Standard',
225-
'cloudflare.r2.response.custom_metadata': true,
225+
'cloudflare.r2.request.key': 'throwOnInvalidEtag',
226226
closed: true,
227227
},
228228
{

src/workerd/api/tail-worker-test.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/workerd/io/trace.c++

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,7 +1469,10 @@ SpanBuilder::SpanBuilder(kj::Maybe<kj::Own<SpanObserver>> observer,
14691469
KJ_IF_SOME(obs, observer) {
14701470
// TODO(o11y): Once we report the user tracing spanOpen event as soon as a span is created, we
14711471
// should be able to fold this virtual call and just get the timestamp directly.
1472-
span.emplace(kj::mv(operationName), startTime.orDefault(obs->getTime()));
1472+
kj::Date time = startTime.orDefault([&]() { return obs->getTime(); });
1473+
// Report spanOpen event for user tracing spans
1474+
obs->reportStart(operationName, time);
1475+
span.emplace(kj::mv(operationName), time);
14731476
this->observer = kj::mv(obs);
14741477
}
14751478
}
@@ -1612,7 +1615,6 @@ void CompleteSpan::copyTo(rpc::UserSpanData::Builder builder) const {
16121615
builder.setStartTimeNs((startTime - kj::UNIX_EPOCH) / kj::NANOSECONDS);
16131616
builder.setEndTimeNs((endTime - kj::UNIX_EPOCH) / kj::NANOSECONDS);
16141617
builder.setSpanId(spanId);
1615-
builder.setParentSpanId(parentSpanId);
16161618

16171619
auto tagsParam = builder.initTags(tags.size());
16181620
auto i = 0;
@@ -1625,7 +1627,6 @@ void CompleteSpan::copyTo(rpc::UserSpanData::Builder builder) const {
16251627

16261628
CompleteSpan::CompleteSpan(rpc::UserSpanData::Reader reader)
16271629
: spanId(reader.getSpanId()),
1628-
parentSpanId(reader.getParentSpanId()),
16291630
operationName(kj::str(reader.getOperationName())),
16301631
startTime(kj::UNIX_EPOCH + reader.getStartTimeNs() * kj::NANOSECONDS),
16311632
endTime(kj::UNIX_EPOCH + reader.getEndTimeNs() * kj::NANOSECONDS) {
@@ -1638,8 +1639,7 @@ CompleteSpan::CompleteSpan(rpc::UserSpanData::Reader reader)
16381639
}
16391640

16401641
CompleteSpan CompleteSpan::clone() const {
1641-
CompleteSpan copy(
1642-
spanId, parentSpanId, kj::ConstString(kj::str(operationName)), startTime, endTime);
1642+
CompleteSpan copy(spanId, kj::ConstString(kj::str(operationName)), startTime, endTime);
16431643
copy.tags.reserve(tags.size());
16441644
for (auto& tag: tags) {
16451645
copy.tags.insert(kj::ConstString(kj::str(tag.key)), spanTagClone(tag.value));

src/workerd/io/trace.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -611,8 +611,10 @@ kj::String KJ_STRINGIFY(const CustomInfo& customInfo);
611611
struct CompleteSpan {
612612
// Represents a completed span within user tracing.
613613
tracing::SpanId spanId;
614-
tracing::SpanId parentSpanId;
615614

615+
// TODO(cleanup): operationName and startTime are not used in the spanClose event, but still used
616+
// for diagnostics and a fallback timestamp in the implementation. Get rid of them once that is no
617+
// longer needed.
616618
kj::ConstString operationName;
617619
kj::Date startTime;
618620
kj::Date endTime;
@@ -623,14 +625,12 @@ struct CompleteSpan {
623625
void copyTo(rpc::UserSpanData::Builder builder) const;
624626
CompleteSpan clone() const;
625627
explicit CompleteSpan(tracing::SpanId spanId,
626-
tracing::SpanId parentSpanId,
627628
kj::ConstString operationName,
628629
kj::Date startTime,
629630
kj::Date endTime,
630631
kj::HashMap<kj::ConstString, tracing::Attribute::Value> tags =
631632
kj::HashMap<kj::ConstString, tracing::Attribute::Value>())
632633
: spanId(spanId),
633-
parentSpanId(parentSpanId),
634634
operationName(kj::mv(operationName)),
635635
startTime(startTime),
636636
endTime(endTime),
@@ -1099,6 +1099,7 @@ class SpanObserver: public kj::Refcounted {
10991099
//
11001100
// This should always be called exactly once per observer.
11011101
virtual void report(const Span& span) = 0;
1102+
virtual void reportStart(kj::ConstString& operationName, kj::Date startTime) = 0;
11021103

11031104
// The current time to be provided for the span. For user tracing, we will override this to
11041105
// provide I/O time. This *requires* that spans are only created when an IOContext is available

src/workerd/io/tracer.c++

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -220,34 +220,44 @@ void WorkerTracer::addSpan(CompleteSpan&& span) {
220220
}
221221
}
222222

223-
// Span events are transmitted together for now.
223+
// Compose span events – attributes and spanClose are transmitted together for now.
224224
auto& topLevelContext = KJ_ASSERT_NONNULL(topLevelInvocationSpanContext);
225-
// Compose span events. For SpanOpen, an all-zero spanId is interpreted as having no spans above
226-
// this one, thus we use the Onset spanId instead (taken from topLevelContext). We go to great
227-
// lengths to rule out getting an all-zero spanId by chance (see SpanId::fromEntropy()), so this
228-
// should be safe.
229-
tracing::SpanId parentSpanId = span.parentSpanId;
230-
if (parentSpanId == tracing::SpanId::nullId) {
231-
parentSpanId = topLevelContext.getSpanId();
232-
}
233-
// TODO(o11y): Actually report the spanOpen event at span creation time
234-
auto spanOpenContext = tracing::InvocationSpanContext(
235-
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), parentSpanId);
236225
auto spanComponentContext = tracing::InvocationSpanContext(
237226
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), span.spanId);
238227

239-
tailStreamWriter->report(
240-
spanOpenContext, tracing::SpanOpen(span.spanId, kj::str(span.operationName)), span.startTime);
241228
// If a span manages to exceed the size limit, truncate it by not providing span attributes.
242229
if (span.tags.size() && messageSize <= MAX_TRACE_BYTES) {
243230
tracing::CustomInfo attr = KJ_MAP(tag, span.tags) {
244-
return tracing::Attribute(kj::ConstString(kj::str(tag.key)), spanTagClone(tag.value));
231+
return tracing::Attribute(kj::mv(tag.key), kj::mv(tag.value));
245232
};
246233
tailStreamWriter->report(spanComponentContext, kj::mv(attr), span.startTime);
247234
}
248235
tailStreamWriter->report(spanComponentContext, tracing::SpanClose(), span.endTime);
249236
}
250237

238+
void WorkerTracer::addSpanOpen(tracing::SpanId spanId,
239+
tracing::SpanId parentSpanId,
240+
kj::ConstString& operationName,
241+
kj::Date startTime) {
242+
// This is where we'll actually encode the span.
243+
if (pipelineLogLevel == PipelineLogLevel::NONE) {
244+
return;
245+
}
246+
247+
auto& tailStreamWriter = KJ_UNWRAP_OR_RETURN(maybeTailStreamWriter);
248+
auto& topLevelContext = KJ_ASSERT_NONNULL(topLevelInvocationSpanContext);
249+
// Compose SpanOpen. An all-zero spanId is interpreted as having no spans above this one, thus we
250+
// use the Onset spanId instead (taken from topLevelContext). We go to great lengths to rule out
251+
// getting an all-zero spanId by chance (see SpanId::fromEntropy()), so this should be safe.
252+
if (parentSpanId == tracing::SpanId::nullId) {
253+
parentSpanId = topLevelContext.getSpanId();
254+
}
255+
auto spanOpenContext = tracing::InvocationSpanContext(
256+
topLevelContext.getTraceId(), topLevelContext.getInvocationId(), parentSpanId);
257+
tailStreamWriter->report(
258+
spanOpenContext, tracing::SpanOpen(spanId, kj::str(operationName)), startTime);
259+
}
260+
251261
void WorkerTracer::addException(const tracing::InvocationSpanContext& context,
252262
kj::Date timestamp,
253263
kj::String name,

src/workerd/io/tracer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ class BaseTracer: public kj::Refcounted {
104104
kj::String message) = 0;
105105
// Add a span.
106106
virtual void addSpan(CompleteSpan&& span) = 0;
107+
virtual void addSpanOpen(tracing::SpanId spanId,
108+
tracing::SpanId parentSpanId,
109+
kj::ConstString& operationName,
110+
kj::Date startTime) = 0;
107111

108112
virtual void addException(const tracing::InvocationSpanContext& context,
109113
kj::Date timestamp,
@@ -182,6 +186,10 @@ class WorkerTracer final: public BaseTracer {
182186
LogLevel logLevel,
183187
kj::String message) override;
184188
void addSpan(CompleteSpan&& span) override;
189+
void addSpanOpen(tracing::SpanId spanId,
190+
tracing::SpanId parentSpanId,
191+
kj::ConstString& operationName,
192+
kj::Date startTime) override;
185193
void addException(const tracing::InvocationSpanContext& context,
186194
kj::Date timestamp,
187195
kj::String name,

src/workerd/server/server.c++

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,11 +1702,11 @@ class SpanSubmitter final: public kj::Refcounted {
17021702
SpanSubmitter(kj::Own<WorkerTracer> workerTracer)
17031703
: predictableSpanId(0),
17041704
workerTracer(kj::mv(workerTracer)) {}
1705-
void submitSpan(tracing::SpanId spanId, tracing::SpanId parentSpanId, const Span& span) {
1705+
void submitSpan(tracing::SpanId spanId, const Span& span) {
17061706
// We largely recreate the span here which feels inefficient, but is hard to avoid given the
17071707
// mismatch between the Span type and the full span information required for OTel.
1708-
CompleteSpan span2(spanId, parentSpanId, kj::ConstString(kj::str(span.operationName)),
1709-
span.startTime, span.endTime);
1708+
CompleteSpan span2(
1709+
spanId, kj::ConstString(kj::str(span.operationName)), span.startTime, span.endTime);
17101710
span2.tags.reserve(span.tags.size());
17111711
for (auto& tag: span.tags) {
17121712
span2.tags.insert(kj::ConstString(kj::str(tag.key)), spanTagClone(tag.value));
@@ -1717,6 +1717,15 @@ class SpanSubmitter final: public kj::Refcounted {
17171717

17181718
workerTracer->addSpan(kj::mv(span2));
17191719
}
1720+
void submitSpanStart(tracing::SpanId spanId,
1721+
tracing::SpanId parentSpanId,
1722+
kj::ConstString& operationName,
1723+
kj::Date startTime) {
1724+
if (isPredictableModeForTest()) {
1725+
startTime = kj::UNIX_EPOCH;
1726+
}
1727+
workerTracer->addSpanOpen(spanId, parentSpanId, operationName, startTime);
1728+
}
17201729

17211730
tracing::SpanId makeSpanId() {
17221731
return tracing::SpanId(predictableSpanId++);
@@ -1743,7 +1752,10 @@ class WorkerTracerSpanObserver: public SpanObserver {
17431752
}
17441753

17451754
void report(const Span& span) override {
1746-
spanSubmitter->submitSpan(spanId, parentSpanId, span);
1755+
spanSubmitter->submitSpan(spanId, span);
1756+
}
1757+
void reportStart(kj::ConstString& operationName, kj::Date startTime) override {
1758+
spanSubmitter->submitSpanStart(spanId, parentSpanId, operationName, startTime);
17471759
}
17481760

17491761
// Provide I/O time to the tracing system for user spans.

0 commit comments

Comments
 (0)