diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index 7123985ed..47e21c53e 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -17,6 +17,7 @@ import { Attachment, deepCopyEvent, renderMessage, + setFilteringFunction, } from "./logger"; import { parseTemplateFormat, @@ -1197,3 +1198,296 @@ describe("sensitive data redaction", () => { expect(copy.input).toBe(""); }); }); + +describe("filtering functionality", () => { + let memoryLogger: any; + + beforeEach(() => { + _exportsForTestingOnly.simulateLoginForTests(); + memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger(); + }); + + afterEach(() => { + setFilteringFunction(null); + _exportsForTestingOnly.clearTestBackgroundLogger(); + }); + + /** + * Test that filtering a middle span creates an orphan, NOT a reparented child. + * + * Given this tree: + * root + * parent + * child + * sibling <-- FILTERED + * descendant + * + * The user might EXPECT descendant to be reparented: + * root + * parent + * child + * descendant <-- moved up to parent + * + * But ACTUAL behavior is descendant becomes orphan under root: + * root + * parent + * child + * descendant <-- orphan (span_parents still points to filtered "sibling") + * + * This is because: + * 1. descendant's span_parents still contains sibling's span_id + * 2. sibling doesn't exist in the logged data + * 3. UI treats spans with missing parents as orphans under root + */ + test("filtering middle span creates orphan under root", async () => { + const filteredSpanIds = new Set(); + + const filterSibling = (event: any): any | null => { + const spanId = event.span_id; + if (spanId && filteredSpanIds.has(spanId)) { + return null; + } + + const spanName = event.span_attributes?.name ?? ""; + if (spanName === "sibling") { + if (spanId) { + filteredSpanIds.add(spanId); + } + return null; + } + + return event; + }; + + setFilteringFunction(filterSibling); + + const logger = initLogger({ + projectName: "test", + projectId: "test-project-id", + }); + + const root = logger.startSpan({ name: "root" }); + const rootSpanId = root.spanId; + + const parent = root.startSpan({ name: "parent" }); + const parentSpanId = parent.spanId; + + const child = parent.startSpan({ name: "child" }); + child.log({ input: "child data" }); + child.end(); + + const sibling = parent.startSpan({ name: "sibling" }); + const siblingSpanId = sibling.spanId; // This span will be filtered + + const descendant = sibling.startSpan({ name: "descendant" }); + descendant.log({ input: "descendant data" }); + const descendantSpanId = descendant.spanId; + descendant.end(); + + sibling.end(); + parent.end(); + root.end(); + + await memoryLogger.flush(); + const events = await memoryLogger.drain(); + + const loggedNames = events.map((e: any) => e.span_attributes?.name); + expect(loggedNames).toContain("root"); + expect(loggedNames).toContain("parent"); + expect(loggedNames).toContain("child"); + expect(loggedNames).not.toContain("sibling"); // Filtered out + expect(loggedNames).toContain("descendant"); // Still logged! + + const descendantLog = events.find( + (e: any) => e.span_attributes?.name === "descendant", + ); + + // Key assertion: descendant's span_parents still points to the FILTERED sibling + // This means the UI will see it as an orphan (parent doesn't exist) + expect(descendantLog.span_parents).toContain(siblingSpanId); + + // The descendant does NOT have parent_span_id pointing to "parent" + // It still thinks its parent is "sibling" (which was filtered) + expect(descendantLog.span_parents).not.toContain(parentSpanId); + + // All spans share the same root_span_id + expect(descendantLog.root_span_id).toBe(rootSpanId); + }); + + /** + * Test that users can implement cascading filter to avoid orphans. + * + * To filter "sibling" AND its descendants (avoiding orphans), users must + * track filtered span_ids and check span_parents in their filter function. + */ + test("filtering with cascade to fix orphans", async () => { + const filteredSpanIds = new Set(); + + const filterSiblingWithCascade = (event: any): any | null => { + const spanId = event.span_id; + const spanParents = event.span_parents || []; + + // Check if any parent was filtered (cascade) + for (const parentId of spanParents) { + if (filteredSpanIds.has(parentId)) { + if (spanId) { + filteredSpanIds.add(spanId); + } + return null; + } + } + + // Check if this span should be filtered + const spanName = event.span_attributes?.name ?? ""; + if (spanName === "sibling") { + if (spanId) { + filteredSpanIds.add(spanId); + } + return null; + } + + return event; + }; + + setFilteringFunction(filterSiblingWithCascade); + + const logger = initLogger({ + projectName: "test", + projectId: "test-project-id", + }); + + const root = logger.startSpan({ name: "root" }); + + const parent = root.startSpan({ name: "parent" }); + + const child = parent.startSpan({ name: "child" }); + child.log({ input: "child data" }); + child.end(); + + const sibling = parent.startSpan({ name: "sibling" }); + + const descendant = sibling.startSpan({ name: "descendant" }); + descendant.log({ input: "descendant data" }); + descendant.end(); + + sibling.end(); + parent.end(); + root.end(); + + await memoryLogger.flush(); + const events = await memoryLogger.drain(); + + const loggedNames = events.map((e: any) => e.span_attributes?.name); + expect(loggedNames).toContain("root"); + expect(loggedNames).toContain("parent"); + expect(loggedNames).toContain("child"); + expect(loggedNames).not.toContain("sibling"); // Filtered + expect(loggedNames).not.toContain("descendant"); // Also filtered (cascaded) + }); + + /** + * Test what happens when the ROOT span is filtered. + * + * Given: + * root <-- FILTERED + * parent + * child + * sibling + * descendant + * + * Result: All spans still logged, but parent becomes orphan. + * The internal hierarchy (parent->child, parent->sibling, sibling->descendant) is preserved. + * UI will create synthetic root and put parent under it as orphan. + */ + test("filtering root span - descendants preserve internal hierarchy but parent becomes orphan", async () => { + const filteredSpanIds = new Set(); + + const filterRoot = (event: any): any | null => { + const spanId = event.span_id; + if (spanId && filteredSpanIds.has(spanId)) { + return null; + } + + const spanName = event.span_attributes?.name ?? ""; + if (spanName === "root") { + if (spanId) { + filteredSpanIds.add(spanId); + } + return null; + } + + return event; + }; + + setFilteringFunction(filterRoot); + + const logger = initLogger({ + projectName: "test", + projectId: "test-project-id", + }); + + const root = logger.startSpan({ name: "root" }); + const rootSpanId = root.spanId; + const rootRootSpanId = root.rootSpanId; + + const parent = root.startSpan({ name: "parent" }); + const parentSpanId = parent.spanId; + + const child = parent.startSpan({ name: "child" }); + child.log({ input: "child data" }); + child.end(); + + const sibling = parent.startSpan({ name: "sibling" }); + const siblingSpanId = sibling.spanId; + + const descendant = sibling.startSpan({ name: "descendant" }); + descendant.log({ input: "descendant data" }); + descendant.end(); + + sibling.end(); + parent.end(); + root.end(); + + await memoryLogger.flush(); + const events = await memoryLogger.drain(); + + // Verify root was filtered but all others are logged + const loggedNames = events.map((e: any) => e.span_attributes?.name); + expect(loggedNames).not.toContain("root"); // Filtered + expect(loggedNames).toContain("parent"); + expect(loggedNames).toContain("child"); + expect(loggedNames).toContain("sibling"); + expect(loggedNames).toContain("descendant"); + + // Get each span's log + const parentLog = events.find( + (e: any) => e.span_attributes?.name === "parent", + ); + const childLog = events.find( + (e: any) => e.span_attributes?.name === "child", + ); + const siblingLog = events.find( + (e: any) => e.span_attributes?.name === "sibling", + ); + const descendantLog = events.find( + (e: any) => e.span_attributes?.name === "descendant", + ); + + // All spans still have root_span_id pointing to the filtered root + expect(parentLog.root_span_id).toBe(rootRootSpanId); + expect(childLog.root_span_id).toBe(rootRootSpanId); + expect(siblingLog.root_span_id).toBe(rootRootSpanId); + expect(descendantLog.root_span_id).toBe(rootRootSpanId); + + // Parent's span_parents points to filtered root (making it an orphan in UI) + expect(parentLog.span_parents).toContain(rootSpanId); + + // But the internal hierarchy is preserved: + // child and sibling are children of parent + expect(childLog.span_parents).toContain(parentSpanId); + expect(siblingLog.span_parents).toContain(parentSpanId); + + // descendant is child of sibling + expect(descendantLog.span_parents).toContain(siblingSpanId); + }); +}); diff --git a/js/src/logger.ts b/js/src/logger.ts index b325f2c61..1c9a0a9c0 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -760,6 +760,14 @@ export class BraintrustState { this.bgLogger().setMaskingFunction(maskingFunction); } + public setFilteringFunction( + filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null, + ): void { + this.bgLogger().setFilteringFunction(filteringFunction); + } + public async login(loginParams: LoginOptions & { forceLogin?: boolean }) { if (this.apiUrl && !loginParams.forceLogin) { return; @@ -2351,11 +2359,19 @@ interface BackgroundLogger { setMaskingFunction( maskingFunction: ((value: unknown) => unknown) | null, ): void; + setFilteringFunction( + filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null, + ): void; } export class TestBackgroundLogger implements BackgroundLogger { private items: LazyValue[][] = []; private maskingFunction: ((value: unknown) => unknown) | null = null; + private filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null = null; log(items: LazyValue[]): void { this.items.push(items); @@ -2367,6 +2383,14 @@ export class TestBackgroundLogger implements BackgroundLogger { this.maskingFunction = maskingFunction; } + setFilteringFunction( + filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null, + ): void { + this.filteringFunction = filteringFunction; + } + async flush(): Promise { return Promise.resolve(); } @@ -2375,11 +2399,29 @@ export class TestBackgroundLogger implements BackgroundLogger { const items = this.items; this.items = []; - // get all the values + // get all the values and apply filtering const events: BackgroundLogEvent[] = []; for (const item of items) { for (const event of item) { - events.push(await event.get()); + const eventData = await event.get(); + + // Apply filtering function + if (this.filteringFunction) { + try { + const filtered = this.filteringFunction(eventData); + if (filtered === null) { + // Event was filtered out, skip it + continue; + } + events.push(filtered); + } catch (e) { + // If filtering fails, log the original event + events.push(eventData); + } + } else if (eventData !== null) { + // Skip null events (filtered out by HTTPBackgroundLogger) + events.push(eventData); + } } } @@ -2443,6 +2485,9 @@ class HTTPBackgroundLogger implements BackgroundLogger { private activeFlushError: unknown = undefined; private onFlushError?: (error: unknown) => void; private maskingFunction: ((value: unknown) => unknown) | null = null; + private filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null = null; public syncFlush: boolean = false; // 6 MB for the AWS lambda gateway (from our own testing). @@ -2543,12 +2588,41 @@ class HTTPBackgroundLogger implements BackgroundLogger { this.maskingFunction = maskingFunction; } + setFilteringFunction( + filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null, + ): void { + this.filteringFunction = filteringFunction; + } + log(items: LazyValue[]) { if (this._disabled) { return; } - const droppedItems = this.queue.push(...items); + // Wrap items with filtering if a filtering function is set + let filteredItems = items; + if (this.filteringFunction) { + filteredItems = items.map((item) => { + return new LazyValue(async () => { + try { + const eventData = await item.get(); + const filtered = this.filteringFunction!(eventData); + if (filtered === null) { + // Event was filtered out - return a marker that will be removed later + return null as any; + } + return filtered; + } catch (e) { + // If filtering fails, return the original event + return await item.get(); + } + }); + }); + } + + const droppedItems = this.queue.push(...filteredItems); if (!this.syncFlush) { this.triggerActiveFlush(); @@ -2682,7 +2756,10 @@ class HTTPBackgroundLogger implements BackgroundLogger { ): Promise<[BackgroundLogEvent[][], Attachment[]]> { for (let i = 0; i < this.numTries; ++i) { try { - const items = await Promise.all(wrappedItems.map((x) => x.get())); + const allItems = await Promise.all(wrappedItems.map((x) => x.get())); + + // Filter out null events (filtered out by filtering function) + const items = allItems.filter((item) => item !== null); // TODO(kevin): `extractAttachments` should ideally come after // `mergeRowBatch`, since merge-overwriting could result in some @@ -3840,6 +3917,51 @@ export function setMaskingFunction( _globalState.setMaskingFunction(maskingFunction); } +/** + * Set a global filtering function to control which events are logged to Braintrust. + * + * The function receives a log event object and should return null to skip logging that event. + * You can also return a modified event, but for redacting sensitive data, prefer using + * setMaskingFunction() instead. + * + * This is useful for: + * - Filtering out events by tags (e.g., LangGraph's 'langsmith:hidden' tag) + * - Skipping zero-duration spans or other overhead + * - Filtering events by span name, score thresholds, or other criteria + * + * Example: + * ```typescript + * function myFilter(event) { + * // Skip events with 'langsmith:hidden' tag + * const rootTags = event.tags || []; + * const metadataTags = event.metadata?.tags || []; + * if ([...rootTags, ...metadataTags].includes('langsmith:hidden')) { + * return null; + * } + * + * // Skip zero-duration spans + * const metrics = event.metrics || {}; + * if (metrics.start === metrics.end) { + * return null; + * } + * + * return event; + * } + * + * setFilteringFunction(myFilter); + * ``` + * + * @param filteringFunction A function that takes a log event and returns null to skip + * logging, or the event to log it. Set to null to disable filtering. + */ +export function setFilteringFunction( + filteringFunction: + | ((event: BackgroundLogEvent) => BackgroundLogEvent | null) + | null, +): void { + _globalState.setFilteringFunction(filteringFunction); +} + /** * Log into Braintrust. This will prompt you for your API token, which you can find at * https://www.braintrust.dev/app/token. This method is called automatically by `init()`. diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 1c9795a50..255762d4b 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -392,6 +392,12 @@ def default_get_api_conn(): # different threads unintentionally use the same override. self._override_bg_logger = threading.local() + # Function for filtering/redacting log events before they are sent. + # The function receives a log event dict and can: + # - Return the event (potentially modified) to log it + # - Return None to skip logging this event + self._filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None + self.reset_login_info() self._prompt_cache = PromptCache( @@ -589,6 +595,12 @@ def set_masking_function(self, masking_function: Callable[[Any], Any] | None) -> """Set the masking function on the background logger.""" self.global_bg_logger().set_masking_function(masking_function) + def set_filtering_function( + self, filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None + ) -> None: + """Set the filtering function on the background logger.""" + self.global_bg_logger().set_filtering_function(filtering_function) + _state: BraintrustState = None # type: ignore @@ -823,6 +835,7 @@ def __init__(self): self.lock = threading.Lock() self.logs = [] self.masking_function: Callable[[Any], Any] | None = None + self.filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None self.upload_attempts: list[BaseAttachment] = [] # Track upload attempts def enforce_queue_size_limit(self, enforce: bool) -> None: @@ -830,12 +843,28 @@ def enforce_queue_size_limit(self, enforce: bool) -> None: def log(self, *args: LazyValue[dict[str, Any]]) -> None: with self.lock: - self.logs.extend(args) + filtered_args = [] + for arg in args: + # Apply filtering function before adding to logs + if self.filtering_function: + filtered = _apply_filtering_function(arg, self) + if filtered is None: + # Event was filtered out, skip it + continue + arg = filtered + filtered_args.append(arg) + self.logs.extend(filtered_args) def set_masking_function(self, masking_function: Callable[[Any], Any] | None) -> None: """Set the masking function for the memory logger.""" self.masking_function = masking_function + def set_filtering_function( + self, filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None + ) -> None: + """Set the filtering function for the memory logger.""" + self.filtering_function = filtering_function + def flush(self, batch_size: int | None = None): """Flush the memory logger, extracting attachments and tracking upload attempts.""" with self.lock: @@ -905,6 +934,7 @@ class _HTTPBackgroundLogger: def __init__(self, api_conn: LazyValue[HTTPConnection]): self.api_conn = api_conn self.masking_function: Callable[[Any], Any] | None = None + self.filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None self.outfile = sys.stderr self.flush_lock = threading.RLock() @@ -971,6 +1001,14 @@ def log(self, *args: LazyValue[dict[str, Any]]) -> None: self._start() dropped_items = [] for event in args: + # Apply filtering function before adding to queue + if self.filtering_function: + filtered = _apply_filtering_function(event, self) + if filtered is None: + # Event was filtered out, skip it + continue + event = filtered + dropped = self.queue.put(event) dropped_items.extend(dropped) @@ -1909,6 +1947,39 @@ def login_to_state( return state +def _apply_filtering_function( + event: LazyValue[dict[str, Any]] | dict[str, Any], logger: _HTTPBackgroundLogger | _MemoryBackgroundLogger +) -> LazyValue[dict[str, Any]] | dict[str, Any] | None: + """ + Helper function to apply the filtering function to an event (lazy or eager). + Returns None if the event should be filtered out, otherwise returns the event. + """ + if not logger.filtering_function: + return event + + try: + # Get the actual event data + if isinstance(event, LazyValue): + event_data = event.get() + else: + event_data = event + + # Apply the filtering function + filtered = logger.filtering_function(event_data) + + if filtered is None: + return None + + # Return in the same format as input + if isinstance(event, LazyValue): + return LazyValue(lambda: filtered, use_mutex=False) + else: + return filtered + except Exception: + # If filtering fails, log the original event + return event + + def set_masking_function(masking_function: Callable[[Any], Any] | None) -> None: """ Set a global masking function that will be applied to all logged data before sending to Braintrust. @@ -1920,6 +1991,44 @@ def set_masking_function(masking_function: Callable[[Any], Any] | None) -> None: _state.set_masking_function(masking_function) +def set_filtering_function( + filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None +) -> None: + """ + Set a global filtering function to control which events are logged to Braintrust. + + The function receives a log event dict and should return None to skip logging that event. + You can also return a modified event, but for redacting sensitive data, prefer using + set_masking_function() instead. + + This is useful for: + - Filtering out events by tags (e.g., LangGraph's 'langsmith:hidden' tag) + - Skipping zero-duration spans or other overhead + - Filtering events by span name, score thresholds, or other criteria + + Example: + def my_filter(event): + # Skip events with 'langsmith:hidden' tag + root_tags = event.get('tags', []) or [] + metadata_tags = event.get('metadata', {}).get('tags', []) or [] + if 'langsmith:hidden' in root_tags + metadata_tags: + return None + + # Skip zero-duration spans + metrics = event.get('metrics', {}) + if metrics.get('start') == metrics.get('end'): + return None + + return event + + braintrust.set_filtering_function(my_filter) + + :param filtering_function: A callable that takes a log event dict and returns None to skip + logging, or the event to log it. Set to None to disable filtering. + """ + _state.set_filtering_function(filtering_function) + + def log(**event: Any) -> str: """ Log a single event to the current experiment. The event will be batched and uploaded behind the scenes. diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index a4ef97ab8..a1c361bbf 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -2187,6 +2187,267 @@ def broken_masking_function(data): braintrust.set_masking_function(None) +def test_filtering_middle_span_creates_orphan_under_root(with_memory_logger, with_simulate_login): + """ + Test that filtering a middle span creates an orphan, NOT a reparented child. + + Given this tree: + root + parent + child + sibling <-- FILTERED + descendant + + The user might EXPECT descendant to be reparented: + root + parent + child + descendant <-- moved up to parent + + But ACTUAL behavior is descendant becomes orphan under root: + root + parent + child + descendant <-- orphan (span_parents still points to filtered "sibling") + + This is because: + 1. descendant's span_parents still contains sibling's span_id + 2. sibling doesn't exist in the logged data + 3. UI treats spans with missing parents as orphans under root + """ + filtered_span_ids = set() + + def filter_sibling(event): + span_id = event.get("span_id") + if span_id and span_id in filtered_span_ids: + return None + + span_name = event.get("span_attributes", {}).get("name", "") + if span_name == "sibling": + if span_id: + filtered_span_ids.add(span_id) + return None + + return event + + braintrust.set_filtering_function(filter_sibling) + + test_logger = init_test_logger("test_project") + + with test_logger.start_span(name="root") as root: + root_span_id = root.span_id + + with root.start_span(name="parent") as parent: + parent_span_id = parent.span_id + + with parent.start_span(name="child") as child: + child.log(input="child data") + + with parent.start_span(name="sibling") as sibling: + sibling_span_id = sibling.span_id # This span will be filtered + + with sibling.start_span(name="descendant") as descendant: + descendant.log(input="descendant data") + descendant_span_id = descendant.span_id + + logs = with_memory_logger.pop() + + logged_names = [l.get("span_attributes", {}).get("name") for l in logs] + assert "root" in logged_names + assert "parent" in logged_names + assert "child" in logged_names + assert "sibling" not in logged_names # Filtered out + assert "descendant" in logged_names # Still logged! + + descendant_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "descendant") + + # Key assertion: descendant's span_parents still points to the FILTERED sibling + # This means the UI will see it as an orphan (parent doesn't exist) + assert sibling_span_id in descendant_log["span_parents"] + + # The descendant does NOT have parent_span_id pointing to "parent" + # It still thinks its parent is "sibling" (which was filtered) + assert parent_span_id not in descendant_log["span_parents"] + + # All spans share the same root_span_id + assert descendant_log["root_span_id"] == root_span_id + + braintrust.set_filtering_function(None) + + +def test_filtering_with_cascade_to_fix_orphans(with_memory_logger, with_simulate_login): + """ + Test that users can implement cascading filter to avoid orphans. + + To filter "sibling" AND its descendants (avoiding orphans), users must + track filtered span_ids and check span_parents in their filter function. + + Given: + root + parent + child + sibling <-- FILTERED (and cascade to descendants) + descendant + + With cascading filter, result is: + root + parent + child + (descendant is also filtered, no orphans) + """ + filtered_span_ids = set() + + def filter_sibling_with_cascade(event): + span_id = event.get("span_id") + span_parents = event.get("span_parents", []) or [] + + # Check if any parent was filtered (cascade) + for parent_id in span_parents: + if parent_id in filtered_span_ids: + if span_id: + filtered_span_ids.add(span_id) + return None + + # Check if this span should be filtered + span_name = event.get("span_attributes", {}).get("name", "") + if span_name == "sibling": + if span_id: + filtered_span_ids.add(span_id) + return None + + return event + + braintrust.set_filtering_function(filter_sibling_with_cascade) + + test_logger = init_test_logger("test_project") + + with test_logger.start_span(name="root") as root: + with root.start_span(name="parent") as parent: + with parent.start_span(name="child") as child: + child.log(input="child data") + + with parent.start_span(name="sibling") as sibling: + with sibling.start_span(name="descendant") as descendant: + descendant.log(input="descendant data") + + logs = with_memory_logger.pop() + + logged_names = [l.get("span_attributes", {}).get("name") for l in logs] + assert "root" in logged_names + assert "parent" in logged_names + assert "child" in logged_names + assert "sibling" not in logged_names # Filtered + assert "descendant" not in logged_names # Also filtered (cascaded) + + braintrust.set_filtering_function(None) + + +def test_filtering_root_span_all_descendants_become_orphans(with_memory_logger, with_simulate_login): + """ + Test what happens when the ROOT span is filtered. + + Given this tree: + root <-- FILTERED + parent + child + sibling + descendant + + Question: Will descendants maintain their relative structure? + parent + child + sibling + descendant + + Answer: NO. All spans become orphans because: + 1. All spans have root_span_id pointing to the filtered root + 2. parent has span_parents pointing to root (which doesn't exist) + 3. UI will create a synthetic root and put parent under it as orphan + 4. child/sibling are children of parent (which exists), so they're NOT orphans + 5. descendant is child of sibling (which exists), so it's NOT an orphan + + So the structure will be: + [synthetic root] + parent <-- orphan (its span_parents points to filtered root) + child + sibling + descendant + + The internal hierarchy is preserved, but parent becomes an orphan. + """ + filtered_span_ids = set() + + def filter_root(event): + span_id = event.get("span_id") + if span_id and span_id in filtered_span_ids: + return None + + span_name = event.get("span_attributes", {}).get("name", "") + if span_name == "root": + if span_id: + filtered_span_ids.add(span_id) + return None + + return event + + braintrust.set_filtering_function(filter_root) + + test_logger = init_test_logger("test_project") + + with test_logger.start_span(name="root") as root: + root_span_id = root.span_id + root_root_span_id = root.root_span_id + + with root.start_span(name="parent") as parent: + parent_span_id = parent.span_id + + with parent.start_span(name="child") as child: + child.log(input="child data") + child_span_id = child.span_id + + with parent.start_span(name="sibling") as sibling: + sibling_span_id = sibling.span_id + + with sibling.start_span(name="descendant") as descendant: + descendant.log(input="descendant data") + descendant_span_id = descendant.span_id + + logs = with_memory_logger.pop() + + # Verify root was filtered but all others are logged + logged_names = [l.get("span_attributes", {}).get("name") for l in logs] + assert "root" not in logged_names # Filtered + assert "parent" in logged_names + assert "child" in logged_names + assert "sibling" in logged_names + assert "descendant" in logged_names + + # Get each span's log + parent_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "parent") + child_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "child") + sibling_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "sibling") + descendant_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "descendant") + + # All spans still have root_span_id pointing to the filtered root + assert parent_log["root_span_id"] == root_root_span_id + assert child_log["root_span_id"] == root_root_span_id + assert sibling_log["root_span_id"] == root_root_span_id + assert descendant_log["root_span_id"] == root_root_span_id + + # Parent's span_parents points to filtered root (making it an orphan in UI) + assert root_span_id in parent_log["span_parents"] + + # But the internal hierarchy is preserved: + # child and sibling are children of parent + assert parent_span_id in child_log["span_parents"] + assert parent_span_id in sibling_log["span_parents"] + + # descendant is child of sibling + assert sibling_span_id in descendant_log["span_parents"] + + braintrust.set_filtering_function(None) + + def test_attachment_unreadable_path_logs_warning(caplog): with caplog.at_level(logging.WARNING, logger="braintrust"): Attachment( @@ -3009,7 +3270,7 @@ def test_extract_attachments_collects_and_replaces(): event = { "input": {"file": attachment1}, "output": {"file": attachment2}, - "metadata": {"files": [attachment1, ext_attachment]} + "metadata": {"files": [attachment1, ext_attachment]}, } attachments = [] @@ -3039,7 +3300,7 @@ def test_extract_attachments_preserves_identity(): event = { "input": attachment, "output": attachment, # Same instance - "metadata": {"file": attachment} # Same instance again + "metadata": {"file": attachment}, # Same instance again } attachments = [] @@ -3078,10 +3339,7 @@ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_l logger = init_test_logger(__name__) span = logger.start_span(name="test_span") - span.log( - input={"file1": attachment1}, - output={"file2": attachment2} - ) + span.log(input={"file1": attachment1}, output={"file2": attachment2}) span.end() logger.flush() @@ -3111,9 +3369,7 @@ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_sim def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login): """Test that ExternalAttachment upload is also tracked.""" ext_attachment = ExternalAttachment( - url="s3://bucket/key.pdf", - filename="external.pdf", - content_type="application/pdf" + url="s3://bucket/key.pdf", filename="external.pdf", content_type="application/pdf" ) logger = init_test_logger(__name__) @@ -3151,11 +3407,7 @@ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_log logger = init_test_logger(__name__) span = logger.start_span(name="test_span") - span.log( - input=attachment, - output=json_attachment, - metadata={"file": ext_attachment} - ) + span.log(input=attachment, output=json_attachment, metadata={"file": ext_attachment}) span.end() logger.flush()