From 04cb01772daead71f81122c2a4046e15ead6c4ce Mon Sep 17 00:00:00 2001
From: Olmo Maldonado <olmo@braintrust.dev>
Date: Thu, 29 Jan 2026 10:59:40 -0800
Subject: [PATCH 1/3] add set filtering function

a general purpose to help customers decide if and when a span will go out. doesn't make any opinions about root/child, etc.
---
 js/src/logger.test.ts            | 157 ++++++++++++++++++++++
 js/src/logger.ts                 | 130 +++++++++++++++++-
 py/src/braintrust/logger.py      | 111 +++++++++++++++-
 py/src/braintrust/test_logger.py | 218 +++++++++++++++++++++++++++++++
 4 files changed, 611 insertions(+), 5 deletions(-)
diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts
index 7123985ed..009d77f8c 100644
--- a/js/src/logger.test.ts
+++ b/js/src/logger.test.ts
@@ -17,6 +17,7 @@ import {
   Attachment,
   deepCopyEvent,
   renderMessage,
+  setFilteringFunction,
 } from "./logger";
 import {
   parseTemplateFormat,
@@ -1197,3 +1198,159 @@ describe("sensitive data redaction", () => {
     expect(copy.input).toBe("<span>");
   });
 });
+
+describe("filtering functionality", () => {
+  let memoryLogger: any;
+
+  beforeEach(() => {
+    _exportsForTestingOnly.simulateLoginForTests();
+    memoryLogger = _exportsForTestingOnly.useTestBackgroundLogger();
+  });
+
+  afterEach(() => {
+    setFilteringFunction(null);
+    _exportsForTestingOnly.clearTestBackgroundLogger();
+  });
+
+  test("filter out events with langsmith:hidden tag", async () => {
+    const filteredSpanIds = new Set<string>();
+
+    const filteringFunction = (event: any): any | null => {
+      const spanId = event.span_id;
+      if (spanId && filteredSpanIds.has(spanId)) {
+        return null;
+      }
+
+      const rootTags = event.tags || [];
+      const metadataTags = event.metadata?.tags || [];
+      const allTags = [...rootTags, ...metadataTags];
+
+      if (allTags.includes("langsmith:hidden")) {
+        if (spanId) {
+          filteredSpanIds.add(spanId);
+        }
+        return null;
+      }
+
+      return event;
+    };
+
+    setFilteringFunction(filteringFunction);
+
+    const logger = initLogger({
+      projectName: "test",
+      projectId: "test-project-id",
+    });
+
+    logger.log({
+      input: "Hidden input",
+      output: "Hidden output",
+      tags: ["langsmith:hidden"],
+    });
+
+    logger.log({
+      input: "Normal input",
+      output: "Normal output",
+      tags: ["normal"],
+    });
+
+    await memoryLogger.flush();
+    const events = await memoryLogger.drain();
+
+    expect(events).toHaveLength(1);
+    expect(events[0].input).toBe("Normal input");
+    expect(events[0].output).toBe("Normal output");
+  });
+
+  test("filter by score threshold", async () => {
+    const filteredSpanIds = new Set<string>();
+
+    const filteringFunction = (event: any): any | null => {
+      const spanId = event.span_id;
+      if (spanId && filteredSpanIds.has(spanId)) {
+        return null;
+      }
+
+      if (event.scores?.accuracy !== undefined && event.scores.accuracy < 0.5) {
+        if (spanId) {
+          filteredSpanIds.add(spanId);
+        }
+        return null;
+      }
+      return event;
+    };
+
+    setFilteringFunction(filteringFunction);
+
+    const experiment = _exportsForTestingOnly.initTestExperiment({
+      projectName: "test",
+      experimentName: "test-exp",
+    });
+
+    experiment.log({
+      input: "bad input",
+      output: "bad output",
+      scores: { accuracy: 0.3 },
+    });
+
+    experiment.log({
+      input: "good input",
+      output: "good output",
+      scores: { accuracy: 0.8 },
+    });
+
+    await memoryLogger.flush();
+    const events = await memoryLogger.drain();
+
+    expect(events).toHaveLength(1);
+    expect(events[0].input).toBe("good input");
+    expect(events[0].scores.accuracy).toBe(0.8);
+  });
+
+  test("filter nested spans by name", async () => {
+    const filteredSpanIds = new Set<string>();
+
+    const filteringFunction = (event: any): any | null => {
+      const spanId = event.span_id;
+      if (spanId && filteredSpanIds.has(spanId)) {
+        return null;
+      }
+
+      if (event.span_attributes?.name === "internal_helper") {
+        if (spanId) {
+          filteredSpanIds.add(spanId);
+        }
+        return null;
+      }
+
+      return event;
+    };
+
+    setFilteringFunction(filteringFunction);
+
+    const logger = initLogger({
+      projectName: "test",
+      projectId: "test-project-id",
+    });
+    const parent = logger.startSpan({ name: "parent_span" });
+    parent.log({ input: "parent input", output: "parent output" });
+
+    const internalChild = parent.startSpan({ name: "internal_helper" });
+    internalChild.log({ input: "hidden input", output: "hidden output" });
+    internalChild.end();
+
+    const publicChild = parent.startSpan({ name: "public_helper" });
+    publicChild.log({ input: "public input", output: "public output" });
+    publicChild.end();
+
+    parent.end();
+
+    await memoryLogger.flush();
+    const events = await memoryLogger.drain();
+
+    const inputEvents = events.filter((e: any) => e.input !== undefined);
+    expect(inputEvents).toHaveLength(2);
+    expect(inputEvents[0].input).toBe("parent input");
+    expect(inputEvents[1].input).toBe("public input");
+  });
+});
diff --git a/js/src/logger.ts b/js/src/logger.ts
index b325f2c61..1c9a0a9c0 100644
--- a/js/src/logger.ts
+++ b/js/src/logger.ts
@@ -760,6 +760,14 @@ export class BraintrustState {
     this.bgLogger().setMaskingFunction(maskingFunction);
   }
 
+  public setFilteringFunction(
+    filteringFunction:
+      | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+      | null,
+  ): void {
+    this.bgLogger().setFilteringFunction(filteringFunction);
+  }
+
   public async login(loginParams: LoginOptions & { forceLogin?: boolean }) {
     if (this.apiUrl && !loginParams.forceLogin) {
       return;
@@ -2351,11 +2359,19 @@ interface BackgroundLogger {
   setMaskingFunction(
     maskingFunction: ((value: unknown) => unknown) | null,
   ): void;
+  setFilteringFunction(
+    filteringFunction:
+      | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+      | null,
+  ): void;
 }
 
 export class TestBackgroundLogger implements BackgroundLogger {
   private items: LazyValue<BackgroundLogEvent>[][] = [];
   private maskingFunction: ((value: unknown) => unknown) | null = null;
+  private filteringFunction:
+    | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+    | null = null;
 
   log(items: LazyValue<BackgroundLogEvent>[]): void {
     this.items.push(items);
@@ -2367,6 +2383,14 @@ export class TestBackgroundLogger implements BackgroundLogger {
     this.maskingFunction = maskingFunction;
   }
 
+  setFilteringFunction(
+    filteringFunction:
+      | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+      | null,
+  ): void {
+    this.filteringFunction = filteringFunction;
+  }
+
   async flush(): Promise<void> {
     return Promise.resolve();
   }
@@ -2375,11 +2399,29 @@ export class TestBackgroundLogger implements BackgroundLogger {
     const items = this.items;
     this.items = [];
 
-    // get all the values
+    // get all the values and apply filtering
     const events: BackgroundLogEvent[] = [];
     for (const item of items) {
       for (const event of item) {
-        events.push(await event.get());
+        const eventData = await event.get();
+
+        // Apply filtering function
+        if (this.filteringFunction) {
+          try {
+            const filtered = this.filteringFunction(eventData);
+            if (filtered === null) {
+              // Event was filtered out, skip it
+              continue;
+            }
+            events.push(filtered);
+          } catch (e) {
+            // If filtering fails, log the original event
+            events.push(eventData);
+          }
+        } else if (eventData !== null) {
+          // Skip null events (filtered out by HTTPBackgroundLogger)
+          events.push(eventData);
+        }
       }
     }
 
@@ -2443,6 +2485,9 @@ class HTTPBackgroundLogger implements BackgroundLogger {
   private activeFlushError: unknown = undefined;
   private onFlushError?: (error: unknown) => void;
   private maskingFunction: ((value: unknown) => unknown) | null = null;
+  private filteringFunction:
+    | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+    | null = null;
 
   public syncFlush: boolean = false;
   // 6 MB for the AWS lambda gateway (from our own testing).
@@ -2543,12 +2588,41 @@ class HTTPBackgroundLogger implements BackgroundLogger {
     this.maskingFunction = maskingFunction;
   }
 
+  setFilteringFunction(
+    filteringFunction:
+      | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+      | null,
+  ): void {
+    this.filteringFunction = filteringFunction;
+  }
+
   log(items: LazyValue<BackgroundLogEvent>[]) {
     if (this._disabled) {
       return;
     }
 
-    const droppedItems = this.queue.push(...items);
+    // Wrap items with filtering if a filtering function is set
+    let filteredItems = items;
+    if (this.filteringFunction) {
+      filteredItems = items.map((item) => {
+        return new LazyValue(async () => {
+          try {
+            const eventData = await item.get();
+            const filtered = this.filteringFunction!(eventData);
+            if (filtered === null) {
+              // Event was filtered out - return a marker that will be removed later
+              return null as any;
+            }
+            return filtered;
+          } catch (e) {
+            // If filtering fails, return the original event
+            return await item.get();
+          }
+        });
+      });
+    }
+
+    const droppedItems = this.queue.push(...filteredItems);
 
     if (!this.syncFlush) {
       this.triggerActiveFlush();
@@ -2682,7 +2756,10 @@ class HTTPBackgroundLogger implements BackgroundLogger {
   ): Promise<[BackgroundLogEvent[][], Attachment[]]> {
     for (let i = 0; i < this.numTries; ++i) {
       try {
-        const items = await Promise.all(wrappedItems.map((x) => x.get()));
+        const allItems = await Promise.all(wrappedItems.map((x) => x.get()));
+
+        // Filter out null events (filtered out by filtering function)
+        const items = allItems.filter((item) => item !== null);
 
         // TODO(kevin): `extractAttachments` should ideally come after
         // `mergeRowBatch`, since merge-overwriting could result in some
@@ -3840,6 +3917,51 @@ export function setMaskingFunction(
   _globalState.setMaskingFunction(maskingFunction);
 }
 
+/**
+ * Set a global filtering function to control which events are logged to Braintrust.
+ *
+ * The function receives a log event object and should return null to skip logging that event.
+ * You can also return a modified event, but for redacting sensitive data, prefer using
+ * setMaskingFunction() instead.
+ *
+ * This is useful for:
+ * - Filtering out events by tags (e.g., LangGraph's 'langsmith:hidden' tag)
+ * - Skipping zero-duration spans or other overhead
+ * - Filtering events by span name, score thresholds, or other criteria
+ *
+ * Example:
+ * ```typescript
+ * function myFilter(event) {
+ *   // Skip events with 'langsmith:hidden' tag
+ *   const rootTags = event.tags || [];
+ *   const metadataTags = event.metadata?.tags || [];
+ *   if ([...rootTags, ...metadataTags].includes('langsmith:hidden')) {
+ *     return null;
+ *   }
+ *
+ *   // Skip zero-duration spans
+ *   const metrics = event.metrics || {};
+ *   if (metrics.start === metrics.end) {
+ *     return null;
+ *   }
+ *
+ *   return event;
+ * }
+ *
+ * setFilteringFunction(myFilter);
+ * ```
+ *
+ * @param filteringFunction A function that takes a log event and returns null to skip
+ *                         logging, or the event to log it. Set to null to disable filtering.
+ */
+export function setFilteringFunction(
+  filteringFunction:
+    | ((event: BackgroundLogEvent) => BackgroundLogEvent | null)
+    | null,
+): void {
+  _globalState.setFilteringFunction(filteringFunction);
+}
+
 /**
  * Log into Braintrust. This will prompt you for your API token, which you can find at
  * https://www.braintrust.dev/app/token. This method is called automatically by `init()`.
diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 1c9795a50..255762d4b 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -392,6 +392,12 @@ def default_get_api_conn():
         # different threads unintentionally use the same override.
         self._override_bg_logger = threading.local()
 
+        # Function for filtering/redacting log events before they are sent.
+        # The function receives a log event dict and can:
+        # - Return the event (potentially modified) to log it
+        # - Return None to skip logging this event
+        self._filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None
+
         self.reset_login_info()
 
         self._prompt_cache = PromptCache(
@@ -589,6 +595,12 @@ def set_masking_function(self, masking_function: Callable[[Any], Any] | None) ->
         """Set the masking function on the background logger."""
         self.global_bg_logger().set_masking_function(masking_function)
 
+    def set_filtering_function(
+        self, filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None
+    ) -> None:
+        """Set the filtering function on the background logger."""
+        self.global_bg_logger().set_filtering_function(filtering_function)
+
 
 _state: BraintrustState = None  # type: ignore
 
@@ -823,6 +835,7 @@ def __init__(self):
         self.lock = threading.Lock()
         self.logs = []
         self.masking_function: Callable[[Any], Any] | None = None
+        self.filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None
         self.upload_attempts: list[BaseAttachment] = []  # Track upload attempts
 
     def enforce_queue_size_limit(self, enforce: bool) -> None:
@@ -830,12 +843,28 @@ def enforce_queue_size_limit(self, enforce: bool) -> None:
 
     def log(self, *args: LazyValue[dict[str, Any]]) -> None:
         with self.lock:
-            self.logs.extend(args)
+            filtered_args = []
+            for arg in args:
+                # Apply filtering function before adding to logs
+                if self.filtering_function:
+                    filtered = _apply_filtering_function(arg, self)
+                    if filtered is None:
+                        # Event was filtered out, skip it
+                        continue
+                    arg = filtered
+                filtered_args.append(arg)
+            self.logs.extend(filtered_args)
 
     def set_masking_function(self, masking_function: Callable[[Any], Any] | None) -> None:
         """Set the masking function for the memory logger."""
         self.masking_function = masking_function
 
+    def set_filtering_function(
+        self, filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None
+    ) -> None:
+        """Set the filtering function for the memory logger."""
+        self.filtering_function = filtering_function
+
     def flush(self, batch_size: int | None = None):
         """Flush the memory logger, extracting attachments and tracking upload attempts."""
         with self.lock:
@@ -905,6 +934,7 @@ class _HTTPBackgroundLogger:
     def __init__(self, api_conn: LazyValue[HTTPConnection]):
         self.api_conn = api_conn
         self.masking_function: Callable[[Any], Any] | None = None
+        self.filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None = None
         self.outfile = sys.stderr
         self.flush_lock = threading.RLock()
 
@@ -971,6 +1001,14 @@ def log(self, *args: LazyValue[dict[str, Any]]) -> None:
         self._start()
         dropped_items = []
         for event in args:
+            # Apply filtering function before adding to queue
+            if self.filtering_function:
+                filtered = _apply_filtering_function(event, self)
+                if filtered is None:
+                    # Event was filtered out, skip it
+                    continue
+                event = filtered
+
             dropped = self.queue.put(event)
             dropped_items.extend(dropped)
 
@@ -1909,6 +1947,39 @@ def login_to_state(
     return state
 
 
+def _apply_filtering_function(
+    event: LazyValue[dict[str, Any]] | dict[str, Any], logger: _HTTPBackgroundLogger | _MemoryBackgroundLogger
+) -> LazyValue[dict[str, Any]] | dict[str, Any] | None:
+    """
+    Helper function to apply the filtering function to an event (lazy or eager).
+    Returns None if the event should be filtered out, otherwise returns the event.
+    """
+    if not logger.filtering_function:
+        return event
+
+    try:
+        # Get the actual event data
+        if isinstance(event, LazyValue):
+            event_data = event.get()
+        else:
+            event_data = event
+
+        # Apply the filtering function
+        filtered = logger.filtering_function(event_data)
+
+        if filtered is None:
+            return None
+
+        # Return in the same format as input
+        if isinstance(event, LazyValue):
+            return LazyValue(lambda: filtered, use_mutex=False)
+        else:
+            return filtered
+    except Exception:
+        # If filtering fails, log the original event
+        return event
+
+
 def set_masking_function(masking_function: Callable[[Any], Any] | None) -> None:
     """
     Set a global masking function that will be applied to all logged data before sending to Braintrust.
@@ -1920,6 +1991,44 @@ def set_masking_function(masking_function: Callable[[Any], Any] | None) -> None:
     _state.set_masking_function(masking_function)
 
 
+def set_filtering_function(
+    filtering_function: Callable[[dict[str, Any]], dict[str, Any] | None] | None
+) -> None:
+    """
+    Set a global filtering function to control which events are logged to Braintrust.
+
+    The function receives a log event dict and should return None to skip logging that event.
+    You can also return a modified event, but for redacting sensitive data, prefer using
+    set_masking_function() instead.
+
+    This is useful for:
+    - Filtering out events by tags (e.g., LangGraph's 'langsmith:hidden' tag)
+    - Skipping zero-duration spans or other overhead
+    - Filtering events by span name, score thresholds, or other criteria
+
+    Example:
+        def my_filter(event):
+            # Skip events with 'langsmith:hidden' tag
+            root_tags = event.get('tags', []) or []
+            metadata_tags = event.get('metadata', {}).get('tags', []) or []
+            if 'langsmith:hidden' in root_tags + metadata_tags:
+                return None
+
+            # Skip zero-duration spans
+            metrics = event.get('metrics', {})
+            if metrics.get('start') == metrics.get('end'):
+                return None
+
+            return event
+
+        braintrust.set_filtering_function(my_filter)
+
+    :param filtering_function: A callable that takes a log event dict and returns None to skip
+                              logging, or the event to log it. Set to None to disable filtering.
+    """
+    _state.set_filtering_function(filtering_function)
+
+
 def log(**event: Any) -> str:
     """
     Log a single event to the current experiment. The event will be batched and uploaded behind the scenes.
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index a4ef97ab8..42f1e09d0 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -2187,6 +2187,224 @@ def broken_masking_function(data):
     braintrust.set_masking_function(None)
 
 
+def test_filtering_function_logger(with_memory_logger, with_simulate_login):
+    """Test that filtering function can filter and redact events in Logger."""
+
+    # Track span IDs that should be filtered
+    filtered_span_ids = set()
+
+    def filtering_function(event):
+        """
+        Filter out events with 'langsmith:hidden' tag.
+        Redact sensitive metadata fields.
+        """
+        # Skip events from filtered spans
+        span_id = event.get("span_id")
+        if span_id and span_id in filtered_span_ids:
+            return None
+
+        # Skip events with langsmith:hidden tag
+        # Tags can be in two places depending on where in the processing we are:
+        # 1. At root level before processing (event["tags"])
+        # 2. In metadata after processing (event["metadata"]["tags"])
+        root_tags = event.get("tags", []) or []
+        metadata_tags = event.get("metadata", {}).get("tags", []) or []
+        all_tags = list(root_tags) + list(metadata_tags)
+
+        if "langsmith:hidden" in all_tags:
+            # Track this span ID so we filter all its events
+            if span_id:
+                filtered_span_ids.add(span_id)
+            return None
+
+        # Redact api_key from metadata
+        if "metadata" in event and "api_key" in event.get("metadata", {}):
+            event = event.copy()
+            event["metadata"] = event["metadata"].copy()
+            event["metadata"]["api_key"] = "***REDACTED***"
+
+        return event
+
+    # Set filtering function globally
+    braintrust.set_filtering_function(filtering_function)
+
+    # Create test logger
+    test_logger = init_test_logger("test_project")
+
+    # Log event that should be filtered out
+    test_logger.log(
+        input="Hidden input",
+        output="Hidden output",
+        tags=["langsmith:hidden"],
+    )
+
+    # Log event that should be redacted
+    test_logger.log(
+        input="Normal input",
+        output="Normal output",
+        metadata={"api_key": "secret123", "user": "testuser"},
+    )
+
+    # Log normal event
+    test_logger.log(
+        input="Another input",
+        output="Another output",
+        tags=["normal"],
+    )
+
+    # Check the logged data
+    logs = with_memory_logger.pop()
+
+    # Should only have 2 logs (the first one was filtered out)
+    assert len(logs) == 2
+
+    # First log should have redacted api_key
+    log1 = logs[0]
+    assert log1["input"] == "Normal input"
+    assert log1["output"] == "Normal output"
+    assert log1["metadata"]["api_key"] == "***REDACTED***"
+    assert log1["metadata"]["user"] == "testuser"
+
+    # Second log should be unchanged
+    log2 = logs[1]
+    assert log2["input"] == "Another input"
+    assert log2["output"] == "Another output"
+    assert log2["tags"] == ["normal"]
+
+    # Clean up
+    braintrust.set_filtering_function(None)
+
+
+def test_filtering_function_experiment(with_memory_logger, with_simulate_login):
+    """Test that filtering function works with Experiment spans."""
+
+    # Track filtered span IDs so we can filter all events from that span
+    filtered_span_ids = set()
+
+    def filtering_function(event):
+        """Filter out spans with score below threshold."""
+        span_id = event.get("span_id")
+
+        # Skip events from already-filtered spans
+        if span_id and span_id in filtered_span_ids:
+            return None
+
+        # Skip spans with low scores
+        scores = event.get("scores", {})
+        if "accuracy" in scores and scores["accuracy"] < 0.5:
+            if span_id:
+                filtered_span_ids.add(span_id)
+            return None
+        return event
+
+    # Set filtering function globally
+    braintrust.set_filtering_function(filtering_function)
+
+    # Create test experiment
+    experiment = init_test_exp("test_project", "test_experiment")
+
+    # Log event with low score (should be filtered out)
+    experiment.log(
+        input="bad input",
+        output="bad output",
+        scores={"accuracy": 0.3},
+    )
+
+    # Log event with good score
+    experiment.log(
+        input="good input",
+        output="good output",
+        scores={"accuracy": 0.8},
+    )
+
+    # Log event without accuracy score (but with a different score)
+    experiment.log(
+        input="neutral input",
+        output="neutral output",
+        scores={"quality": 0.9},
+    )
+
+    experiment.flush()
+
+    # Check the logged data
+    logs = with_memory_logger.pop()
+
+    # Should only have 2 logs (the first one with low score was filtered out)
+    assert len(logs) == 2
+
+    log1 = logs[0]
+    assert log1["input"] == "good input"
+    assert log1["scores"]["accuracy"] == 0.8
+
+    log2 = logs[1]
+    assert log2["input"] == "neutral input"
+    assert log2["scores"]["quality"] == 0.9
+
+    # Clean up
+    braintrust.set_filtering_function(None)
+
+
+def test_filtering_function_with_child_spans(with_memory_logger, with_simulate_login):
+    """Test that filtering function works with nested spans."""
+
+    # Track filtered span IDs
+    filtered_span_ids = set()
+
+    def filtering_function(event):
+        """Filter out spans with specific names."""
+        span_id = event.get("span_id")
+
+        # Skip events from already-filtered spans
+        if span_id and span_id in filtered_span_ids:
+            return None
+
+        # Filter out spans named "internal_helper"
+        span_attrs = event.get("span_attributes", {})
+        if span_attrs.get("name") == "internal_helper":
+            if span_id:
+                filtered_span_ids.add(span_id)
+            return None
+
+        return event
+
+    # Set filtering function globally
+    braintrust.set_filtering_function(filtering_function)
+
+    # Create test logger
+    test_logger = init_test_logger("test_project")
+
+    # Create a parent span
+    with test_logger.start_span(name="parent_span") as parent:
+        # Log to parent (should be kept)
+        parent.log(input="parent input", output="result1")
+
+        # Create a child span that should be filtered out
+        with parent.start_span(name="internal_helper") as child:
+            child.log(input="hidden input", output="hidden output")
+
+        # Create another child span that should be kept
+        with parent.start_span(name="public_helper") as child2:
+            child2.log(input="public input", output="result2")
+
+    # Check the logged data
+    logs = with_memory_logger.pop()
+
+    # Should have events from parent_span and public_helper, but not internal_helper
+    # span.log() on existing spans creates merge events, so we get 2 events
+    assert len(logs) == 2
+
+    # First should be from parent_span
+    assert logs[0]["input"] == "parent input"
+    assert logs[0]["output"] == "result1"
+
+    # Second should be from public_helper
+    assert logs[1]["input"] == "public input"
+    assert logs[1]["output"] == "result2"
+
+    # Clean up
+    braintrust.set_filtering_function(None)
+
+
 def test_attachment_unreadable_path_logs_warning(caplog):
     with caplog.at_level(logging.WARNING, logger="braintrust"):
         Attachment(

From 5d8f4311e21af6f50b128df8e65905711df9f3cb Mon Sep 17 00:00:00 2001
From: Olmo Maldonado <olmo@braintrust.dev>
Date: Fri, 30 Jan 2026 15:44:20 -0800
Subject: [PATCH 2/3] add root -> parent -> child + sibling (filtered)->
 descendant test

---
 js/src/logger.test.ts            | 187 ++++++++++++--------
 py/src/braintrust/test_logger.py | 294 ++++++++++++-------------------
 2 files changed, 220 insertions(+), 261 deletions(-)

diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts
index 009d77f8c..c24cbbbeb 100644
--- a/js/src/logger.test.ts
+++ b/js/src/logger.test.ts
@@ -1212,20 +1212,44 @@ describe("filtering functionality", () => {
     _exportsForTestingOnly.clearTestBackgroundLogger();
   });
 
-  test("filter out events with langsmith:hidden tag", async () => {
+  /**
+   * Test that filtering a middle span creates an orphan, NOT a reparented child.
+   *
+   * Given this tree:
+   *     root
+   *       parent
+   *         child
+   *         sibling  <-- FILTERED
+   *           descendant
+   *
+   * The user might EXPECT descendant to be reparented:
+   *     root
+   *       parent
+   *         child
+   *         descendant  <-- moved up to parent
+   *
+   * But ACTUAL behavior is descendant becomes orphan under root:
+   *     root
+   *       parent
+   *         child
+   *       descendant  <-- orphan (span_parents still points to filtered "sibling")
+   *
+   * This is because:
+   * 1. descendant's span_parents still contains sibling's span_id
+   * 2. sibling doesn't exist in the logged data
+   * 3. UI treats spans with missing parents as orphans under root
+   */
+  test("filtering middle span creates orphan under root", async () => {
     const filteredSpanIds = new Set<string>();
 
-    const filteringFunction = (event: any): any | null => {
+    const filterSibling = (event: any): any | null => {
       const spanId = event.span_id;
       if (spanId && filteredSpanIds.has(spanId)) {
         return null;
       }
 
-      const rootTags = event.tags || [];
-      const metadataTags = event.metadata?.tags || [];
-      const allTags = [...rootTags, ...metadataTags];
-
-      if (allTags.includes("langsmith:hidden")) {
+      const spanName = event.span_attributes?.name ?? "";
+      if (spanName === "sibling") {
         if (spanId) {
           filteredSpanIds.add(spanId);
         }
@@ -1235,88 +1259,87 @@ describe("filtering functionality", () => {
       return event;
     };
 
-    setFilteringFunction(filteringFunction);
+    setFilteringFunction(filterSibling);
 
     const logger = initLogger({
       projectName: "test",
       projectId: "test-project-id",
     });
 
-    logger.log({
-      input: "Hidden input",
-      output: "Hidden output",
-      tags: ["langsmith:hidden"],
-    });
+    const root = logger.startSpan({ name: "root" });
+    const rootSpanId = root.spanId;
 
-    logger.log({
-      input: "Normal input",
-      output: "Normal output",
-      tags: ["normal"],
-    });
+    const parent = root.startSpan({ name: "parent" });
+    const parentSpanId = parent.spanId;
 
-    await memoryLogger.flush();
-    const events = await memoryLogger.drain();
+    const child = parent.startSpan({ name: "child" });
+    child.log({ input: "child data" });
+    child.end();
 
-    expect(events).toHaveLength(1);
-    expect(events[0].input).toBe("Normal input");
-    expect(events[0].output).toBe("Normal output");
-  });
-
-  test("filter by score threshold", async () => {
-    const filteredSpanIds = new Set<string>();
+    const sibling = parent.startSpan({ name: "sibling" });
+    const siblingSpanId = sibling.spanId; // This span will be filtered
 
-    const filteringFunction = (event: any): any | null => {
-      const spanId = event.span_id;
-      if (spanId && filteredSpanIds.has(spanId)) {
-        return null;
-      }
+    const descendant = sibling.startSpan({ name: "descendant" });
+    descendant.log({ input: "descendant data" });
+    const descendantSpanId = descendant.spanId;
+    descendant.end();
 
-      if (event.scores?.accuracy !== undefined && event.scores.accuracy < 0.5) {
-        if (spanId) {
-          filteredSpanIds.add(spanId);
-        }
-        return null;
-      }
-      return event;
-    };
+    sibling.end();
+    parent.end();
+    root.end();
 
-    setFilteringFunction(filteringFunction);
+    await memoryLogger.flush();
+    const events = await memoryLogger.drain();
 
-    const experiment = _exportsForTestingOnly.initTestExperiment({
-      projectName: "test",
-      experimentName: "test-exp",
-    });
+    const loggedNames = events.map((e: any) => e.span_attributes?.name);
+    expect(loggedNames).toContain("root");
+    expect(loggedNames).toContain("parent");
+    expect(loggedNames).toContain("child");
+    expect(loggedNames).not.toContain("sibling"); // Filtered out
+    expect(loggedNames).toContain("descendant"); // Still logged!
 
-    experiment.log({
-      input: "bad input",
-      output: "bad output",
-      scores: { accuracy: 0.3 },
-    });
+    const descendantLog = events.find(
+      (e: any) => e.span_attributes?.name === "descendant",
+    );
 
-    experiment.log({
-      input: "good input",
-      output: "good output",
-      scores: { accuracy: 0.8 },
-    });
+    // Key assertion: descendant's span_parents still points to the FILTERED sibling
+    // This means the UI will see it as an orphan (parent doesn't exist)
+    expect(descendantLog.span_parents).toContain(siblingSpanId);
 
-    await memoryLogger.flush();
-    const events = await memoryLogger.drain();
+    // The descendant does NOT have parent_span_id pointing to "parent"
+    // It still thinks its parent is "sibling" (which was filtered)
+    expect(descendantLog.span_parents).not.toContain(parentSpanId);
 
-    expect(events).toHaveLength(1);
-    expect(events[0].input).toBe("good input");
-    expect(events[0].scores.accuracy).toBe(0.8);
+    // All spans share the same root_span_id
+    expect(descendantLog.root_span_id).toBe(rootSpanId);
   });
 
-  test("filter nested spans by name", async () => {
+  /**
+   * Test that users can implement cascading filter to avoid orphans.
+   *
+   * To filter "sibling" AND its descendants (avoiding orphans), users must
+   * track filtered span_ids and check span_parents in their filter function.
+   */
+  test("filtering with cascade to fix orphans", async () => {
     const filteredSpanIds = new Set<string>();
 
-    const filteringFunction = (event: any): any | null => {
+    const filterSiblingWithCascade = (event: any): any | null => {
       const spanId = event.span_id;
-      if (spanId && filteredSpanIds.has(spanId)) {
-        return null;
+      const spanParents = event.span_parents || [];
+
+      // Check if any parent was filtered (cascade)
+      for (const parentId of spanParents) {
+        if (filteredSpanIds.has(parentId)) {
+          if (spanId) {
+            filteredSpanIds.add(spanId);
+          }
+          return null;
+        }
       }
 
-      if (event.span_attributes?.name === "internal_helper") {
+      // Check if this span should be filtered
+      const spanName = event.span_attributes?.name ?? "";
+      if (spanName === "sibling") {
         if (spanId) {
           filteredSpanIds.add(spanId);
         }
@@ -1326,31 +1349,39 @@ describe("filtering functionality", () => {
       return event;
     };
 
-    setFilteringFunction(filteringFunction);
+    setFilteringFunction(filterSiblingWithCascade);
 
     const logger = initLogger({
       projectName: "test",
       projectId: "test-project-id",
     });
-    const parent = logger.startSpan({ name: "parent_span" });
-    parent.log({ input: "parent input", output: "parent output" });
 
-    const internalChild = parent.startSpan({ name: "internal_helper" });
-    internalChild.log({ input: "hidden input", output: "hidden output" });
-    internalChild.end();
+    const root = logger.startSpan({ name: "root" });
+
+    const parent = root.startSpan({ name: "parent" });
+
+    const child = parent.startSpan({ name: "child" });
+    child.log({ input: "child data" });
+    child.end();
+
+    const sibling = parent.startSpan({ name: "sibling" });
 
-    const publicChild = parent.startSpan({ name: "public_helper" });
-    publicChild.log({ input: "public input", output: "public output" });
-    publicChild.end();
+    const descendant = sibling.startSpan({ name: "descendant" });
+    descendant.log({ input: "descendant data" });
+    descendant.end();
 
+    sibling.end();
     parent.end();
+    root.end();
 
     await memoryLogger.flush();
     const events = await memoryLogger.drain();
 
-    const inputEvents = events.filter((e: any) => e.input !== undefined);
-    expect(inputEvents).toHaveLength(2);
-    expect(inputEvents[0].input).toBe("parent input");
-    expect(inputEvents[1].input).toBe("public input");
+    const loggedNames = events.map((e: any) => e.span_attributes?.name);
+    expect(loggedNames).toContain("root");
+    expect(loggedNames).toContain("parent");
+    expect(loggedNames).toContain("child");
+    expect(loggedNames).not.toContain("sibling"); // Filtered
+    expect(loggedNames).not.toContain("descendant"); // Also filtered (cascaded)
   });
 });
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 42f1e09d0..924181868 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -2187,221 +2187,158 @@ def broken_masking_function(data):
     braintrust.set_masking_function(None)
 
 
-def test_filtering_function_logger(with_memory_logger, with_simulate_login):
-    """Test that filtering function can filter and redact events in Logger."""
-
-    # Track span IDs that should be filtered
+def test_filtering_middle_span_creates_orphan_under_root(with_memory_logger, with_simulate_login):
+    """
+    Test that filtering a middle span creates an orphan, NOT a reparented child.
+
+    Given this tree:
+        root
+          parent
+            child
+            sibling  <-- FILTERED
+              descendant
+
+    The user might EXPECT descendant to be reparented:
+        root
+          parent
+            child
+            descendant  <-- moved up to parent
+
+    But ACTUAL behavior is descendant becomes orphan under root:
+        root
+          parent
+            child
+          descendant  <-- orphan (span_parents still points to filtered "sibling")
+
+    This is because:
+    1. descendant's span_parents still contains sibling's span_id
+    2. sibling doesn't exist in the logged data
+    3. UI treats spans with missing parents as orphans under root
+    """
     filtered_span_ids = set()
 
-    def filtering_function(event):
-        """
-        Filter out events with 'langsmith:hidden' tag.
-        Redact sensitive metadata fields.
-        """
-        # Skip events from filtered spans
+    def filter_sibling(event):
         span_id = event.get("span_id")
         if span_id and span_id in filtered_span_ids:
             return None
 
-        # Skip events with langsmith:hidden tag
-        # Tags can be in two places depending on where in the processing we are:
-        # 1. At root level before processing (event["tags"])
-        # 2. In metadata after processing (event["metadata"]["tags"])
-        root_tags = event.get("tags", []) or []
-        metadata_tags = event.get("metadata", {}).get("tags", []) or []
-        all_tags = list(root_tags) + list(metadata_tags)
-
-        if "langsmith:hidden" in all_tags:
-            # Track this span ID so we filter all its events
+        span_name = event.get("span_attributes", {}).get("name", "")
+        if span_name == "sibling":
             if span_id:
                 filtered_span_ids.add(span_id)
             return None
 
-        # Redact api_key from metadata
-        if "metadata" in event and "api_key" in event.get("metadata", {}):
-            event = event.copy()
-            event["metadata"] = event["metadata"].copy()
-            event["metadata"]["api_key"] = "***REDACTED***"
-
         return event
 
-    # Set filtering function globally
-    braintrust.set_filtering_function(filtering_function)
+    braintrust.set_filtering_function(filter_sibling)
 
-    # Create test logger
     test_logger = init_test_logger("test_project")
 
-    # Log event that should be filtered out
-    test_logger.log(
-        input="Hidden input",
-        output="Hidden output",
-        tags=["langsmith:hidden"],
-    )
-
-    # Log event that should be redacted
-    test_logger.log(
-        input="Normal input",
-        output="Normal output",
-        metadata={"api_key": "secret123", "user": "testuser"},
-    )
-
-    # Log normal event
-    test_logger.log(
-        input="Another input",
-        output="Another output",
-        tags=["normal"],
-    )
-
-    # Check the logged data
-    logs = with_memory_logger.pop()
-
-    # Should only have 2 logs (the first one was filtered out)
-    assert len(logs) == 2
-
-    # First log should have redacted api_key
-    log1 = logs[0]
-    assert log1["input"] == "Normal input"
-    assert log1["output"] == "Normal output"
-    assert log1["metadata"]["api_key"] == "***REDACTED***"
-    assert log1["metadata"]["user"] == "testuser"
-
-    # Second log should be unchanged
-    log2 = logs[1]
-    assert log2["input"] == "Another input"
-    assert log2["output"] == "Another output"
-    assert log2["tags"] == ["normal"]
-
-    # Clean up
-    braintrust.set_filtering_function(None)
-
-
-def test_filtering_function_experiment(with_memory_logger, with_simulate_login):
-    """Test that filtering function works with Experiment spans."""
-
-    # Track filtered span IDs so we can filter all events from that span
-    filtered_span_ids = set()
-
-    def filtering_function(event):
-        """Filter out spans with score below threshold."""
-        span_id = event.get("span_id")
-
-        # Skip events from already-filtered spans
-        if span_id and span_id in filtered_span_ids:
-            return None
-
-        # Skip spans with low scores
-        scores = event.get("scores", {})
-        if "accuracy" in scores and scores["accuracy"] < 0.5:
-            if span_id:
-                filtered_span_ids.add(span_id)
-            return None
-        return event
+    with test_logger.start_span(name="root") as root:
+        root_span_id = root.span_id
 
-    # Set filtering function globally
-    braintrust.set_filtering_function(filtering_function)
+        with root.start_span(name="parent") as parent:
+            parent_span_id = parent.span_id
 
-    # Create test experiment
-    experiment = init_test_exp("test_project", "test_experiment")
+            with parent.start_span(name="child") as child:
+                child.log(input="child data")
 
-    # Log event with low score (should be filtered out)
-    experiment.log(
-        input="bad input",
-        output="bad output",
-        scores={"accuracy": 0.3},
-    )
+            with parent.start_span(name="sibling") as sibling:
+                sibling_span_id = sibling.span_id  # This span will be filtered
 
-    # Log event with good score
-    experiment.log(
-        input="good input",
-        output="good output",
-        scores={"accuracy": 0.8},
-    )
+                with sibling.start_span(name="descendant") as descendant:
+                    descendant.log(input="descendant data")
+                    descendant_span_id = descendant.span_id
 
-    # Log event without accuracy score (but with a different score)
-    experiment.log(
-        input="neutral input",
-        output="neutral output",
-        scores={"quality": 0.9},
-    )
+    logs = with_memory_logger.pop()
 
-    experiment.flush()
+    logged_names = [l.get("span_attributes", {}).get("name") for l in logs]
+    assert "root" in logged_names
+    assert "parent" in logged_names
+    assert "child" in logged_names
+    assert "sibling" not in logged_names  # Filtered out
+    assert "descendant" in logged_names  # Still logged!
 
-    # Check the logged data
-    logs = with_memory_logger.pop()
+    descendant_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "descendant")
 
-    # Should only have 2 logs (the first one with low score was filtered out)
-    assert len(logs) == 2
+    # Key assertion: descendant's span_parents still points to the FILTERED sibling
+    # This means the UI will see it as an orphan (parent doesn't exist)
+    assert sibling_span_id in descendant_log["span_parents"]
 
-    log1 = logs[0]
-    assert log1["input"] == "good input"
-    assert log1["scores"]["accuracy"] == 0.8
+    # The descendant does NOT have parent_span_id pointing to "parent"
+    # It still thinks its parent is "sibling" (which was filtered)
+    assert parent_span_id not in descendant_log["span_parents"]
 
-    log2 = logs[1]
-    assert log2["input"] == "neutral input"
-    assert log2["scores"]["quality"] == 0.9
+    # All spans share the same root_span_id
+    assert descendant_log["root_span_id"] == root_span_id
 
-    # Clean up
     braintrust.set_filtering_function(None)
 
 
-def test_filtering_function_with_child_spans(with_memory_logger, with_simulate_login):
-    """Test that filtering function works with nested spans."""
-
-    # Track filtered span IDs
+def test_filtering_with_cascade_to_fix_orphans(with_memory_logger, with_simulate_login):
+    """
+    Test that users can implement cascading filter to avoid orphans.
+
+    To filter "sibling" AND its descendants (avoiding orphans), users must
+    track filtered span_ids and check span_parents in their filter function.
+
+    Given:
+        root
+          parent
+            child
+            sibling  <-- FILTERED (and cascade to descendants)
+              descendant
+
+    With cascading filter, result is:
+        root
+          parent
+            child
+        (descendant is also filtered, no orphans)
+    """
     filtered_span_ids = set()
 
-    def filtering_function(event):
-        """Filter out spans with specific names."""
+    def filter_sibling_with_cascade(event):
         span_id = event.get("span_id")
-
-        # Skip events from already-filtered spans
-        if span_id and span_id in filtered_span_ids:
-            return None
-
-        # Filter out spans named "internal_helper"
-        span_attrs = event.get("span_attributes", {})
-        if span_attrs.get("name") == "internal_helper":
+        span_parents = event.get("span_parents", []) or []
+
+        # Check if any parent was filtered (cascade)
+        for parent_id in span_parents:
+            if parent_id in filtered_span_ids:
+                if span_id:
+                    filtered_span_ids.add(span_id)
+                return None
+
+        # Check if this span should be filtered
+        span_name = event.get("span_attributes", {}).get("name", "")
+        if span_name == "sibling":
             if span_id:
                 filtered_span_ids.add(span_id)
             return None
 
         return event
 
-    # Set filtering function globally
-    braintrust.set_filtering_function(filtering_function)
+    braintrust.set_filtering_function(filter_sibling_with_cascade)
 
-    # Create test logger
     test_logger = init_test_logger("test_project")
 
-    # Create a parent span
-    with test_logger.start_span(name="parent_span") as parent:
-        # Log to parent (should be kept)
-        parent.log(input="parent input", output="result1")
-
-        # Create a child span that should be filtered out
-        with parent.start_span(name="internal_helper") as child:
-            child.log(input="hidden input", output="hidden output")
+    with test_logger.start_span(name="root") as root:
+        with root.start_span(name="parent") as parent:
+            with parent.start_span(name="child") as child:
+                child.log(input="child data")
 
-        # Create another child span that should be kept
-        with parent.start_span(name="public_helper") as child2:
-            child2.log(input="public input", output="result2")
+            with parent.start_span(name="sibling") as sibling:
+                with sibling.start_span(name="descendant") as descendant:
+                    descendant.log(input="descendant data")
 
-    # Check the logged data
     logs = with_memory_logger.pop()
 
-    # Should have events from parent_span and public_helper, but not internal_helper
-    # span.log() on existing spans creates merge events, so we get 2 events
-    assert len(logs) == 2
-
-    # First should be from parent_span
-    assert logs[0]["input"] == "parent input"
-    assert logs[0]["output"] == "result1"
+    logged_names = [l.get("span_attributes", {}).get("name") for l in logs]
+    assert "root" in logged_names
+    assert "parent" in logged_names
+    assert "child" in logged_names
+    assert "sibling" not in logged_names  # Filtered
+    assert "descendant" not in logged_names  # Also filtered (cascaded)
 
-    # Second should be from public_helper
-    assert logs[1]["input"] == "public input"
-    assert logs[1]["output"] == "result2"
-
-    # Clean up
     braintrust.set_filtering_function(None)
 
 
@@ -3227,7 +3164,7 @@ def test_extract_attachments_collects_and_replaces():
     event = {
         "input": {"file": attachment1},
         "output": {"file": attachment2},
-        "metadata": {"files": [attachment1, ext_attachment]}
+        "metadata": {"files": [attachment1, ext_attachment]},
     }
 
     attachments = []
@@ -3257,7 +3194,7 @@ def test_extract_attachments_preserves_identity():
     event = {
         "input": attachment,
         "output": attachment,  # Same instance
-        "metadata": {"file": attachment}  # Same instance again
+        "metadata": {"file": attachment},  # Same instance again
     }
 
     attachments = []
@@ -3296,10 +3233,7 @@ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_l
 
     logger = init_test_logger(__name__)
     span = logger.start_span(name="test_span")
-    span.log(
-        input={"file1": attachment1},
-        output={"file2": attachment2}
-    )
+    span.log(input={"file1": attachment1}, output={"file2": attachment2})
     span.end()
     logger.flush()
 
@@ -3329,9 +3263,7 @@ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_sim
 def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
     """Test that ExternalAttachment upload is also tracked."""
     ext_attachment = ExternalAttachment(
-        url="s3://bucket/key.pdf",
-        filename="external.pdf",
-        content_type="application/pdf"
+        url="s3://bucket/key.pdf", filename="external.pdf", content_type="application/pdf"
     )
 
     logger = init_test_logger(__name__)
@@ -3369,11 +3301,7 @@ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_log
 
     logger = init_test_logger(__name__)
     span = logger.start_span(name="test_span")
-    span.log(
-        input=attachment,
-        output=json_attachment,
-        metadata={"file": ext_attachment}
-    )
+    span.log(input=attachment, output=json_attachment, metadata={"file": ext_attachment})
     span.end()
     logger.flush()
 

From cdee484941072597fa9a48879f584a34f3ed068b Mon Sep 17 00:00:00 2001
From: Olmo Maldonado <olmo@braintrust.dev>
Date: Fri, 30 Jan 2026 15:59:34 -0800
Subject: [PATCH 3/3] add root (filtered) > child + sibling > descendant test

---
 js/src/logger.test.ts            | 106 +++++++++++++++++++++++++++++++
 py/src/braintrust/test_logger.py | 106 +++++++++++++++++++++++++++++++
 2 files changed, 212 insertions(+)

diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts
index c24cbbbeb..47e21c53e 100644
--- a/js/src/logger.test.ts
+++ b/js/src/logger.test.ts
@@ -1384,4 +1384,110 @@ describe("filtering functionality", () => {
     expect(loggedNames).not.toContain("sibling"); // Filtered
     expect(loggedNames).not.toContain("descendant"); // Also filtered (cascaded)
   });
+
+  /**
+   * Test what happens when the ROOT span is filtered.
+   *
+   * Given:
+   *     root  <-- FILTERED
+   *       parent
+   *         child
+   *         sibling
+   *           descendant
+   *
+   * Result: All spans still logged, but parent becomes orphan.
+   * The internal hierarchy (parent->child, parent->sibling, sibling->descendant) is preserved.
+   * UI will create synthetic root and put parent under it as orphan.
+   */
+  test("filtering root span - descendants preserve internal hierarchy but parent becomes orphan", async () => {
+    const filteredSpanIds = new Set<string>();
+
+    const filterRoot = (event: any): any | null => {
+      const spanId = event.span_id;
+      if (spanId && filteredSpanIds.has(spanId)) {
+        return null;
+      }
+
+      const spanName = event.span_attributes?.name ?? "";
+      if (spanName === "root") {
+        if (spanId) {
+          filteredSpanIds.add(spanId);
+        }
+        return null;
+      }
+
+      return event;
+    };
+
+    setFilteringFunction(filterRoot);
+
+    const logger = initLogger({
+      projectName: "test",
+      projectId: "test-project-id",
+    });
+
+    const root = logger.startSpan({ name: "root" });
+    const rootSpanId = root.spanId;
+    const rootRootSpanId = root.rootSpanId;
+
+    const parent = root.startSpan({ name: "parent" });
+    const parentSpanId = parent.spanId;
+
+    const child = parent.startSpan({ name: "child" });
+    child.log({ input: "child data" });
+    child.end();
+
+    const sibling = parent.startSpan({ name: "sibling" });
+    const siblingSpanId = sibling.spanId;
+
+    const descendant = sibling.startSpan({ name: "descendant" });
+    descendant.log({ input: "descendant data" });
+    descendant.end();
+
+    sibling.end();
+    parent.end();
+    root.end();
+
+    await memoryLogger.flush();
+    const events = await memoryLogger.drain();
+
+    // Verify root was filtered but all others are logged
+    const loggedNames = events.map((e: any) => e.span_attributes?.name);
+    expect(loggedNames).not.toContain("root"); // Filtered
+    expect(loggedNames).toContain("parent");
+    expect(loggedNames).toContain("child");
+    expect(loggedNames).toContain("sibling");
+    expect(loggedNames).toContain("descendant");
+
+    // Get each span's log
+    const parentLog = events.find(
+      (e: any) => e.span_attributes?.name === "parent",
+    );
+    const childLog = events.find(
+      (e: any) => e.span_attributes?.name === "child",
+    );
+    const siblingLog = events.find(
+      (e: any) => e.span_attributes?.name === "sibling",
+    );
+    const descendantLog = events.find(
+      (e: any) => e.span_attributes?.name === "descendant",
+    );
+
+    // All spans still have root_span_id pointing to the filtered root
+    expect(parentLog.root_span_id).toBe(rootRootSpanId);
+    expect(childLog.root_span_id).toBe(rootRootSpanId);
+    expect(siblingLog.root_span_id).toBe(rootRootSpanId);
+    expect(descendantLog.root_span_id).toBe(rootRootSpanId);
+
+    // Parent's span_parents points to filtered root (making it an orphan in UI)
+    expect(parentLog.span_parents).toContain(rootSpanId);
+
+    // But the internal hierarchy is preserved:
+    // child and sibling are children of parent
+    expect(childLog.span_parents).toContain(parentSpanId);
+    expect(siblingLog.span_parents).toContain(parentSpanId);
+
+    // descendant is child of sibling
+    expect(descendantLog.span_parents).toContain(siblingSpanId);
+  });
 });
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 924181868..a1c361bbf 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -2342,6 +2342,112 @@ def filter_sibling_with_cascade(event):
     braintrust.set_filtering_function(None)
 
 
+def test_filtering_root_span_all_descendants_become_orphans(with_memory_logger, with_simulate_login):
+    """
+    Test what happens when the ROOT span is filtered.
+
+    Given this tree:
+        root  <-- FILTERED
+          parent
+            child
+            sibling
+              descendant
+
+    Question: Will descendants maintain their relative structure?
+        parent
+          child
+          sibling
+            descendant
+
+    Answer: NO. All spans become orphans because:
+    1. All spans have root_span_id pointing to the filtered root
+    2. parent has span_parents pointing to root (which doesn't exist)
+    3. UI will create a synthetic root and put parent under it as orphan
+    4. child/sibling are children of parent (which exists), so they're NOT orphans
+    5. descendant is child of sibling (which exists), so it's NOT an orphan
+
+    So the structure will be:
+        [synthetic root]
+          parent  <-- orphan (its span_parents points to filtered root)
+            child
+            sibling
+              descendant
+
+    The internal hierarchy is preserved, but parent becomes an orphan.
+    """
+    filtered_span_ids = set()
+
+    def filter_root(event):
+        span_id = event.get("span_id")
+        if span_id and span_id in filtered_span_ids:
+            return None
+
+        span_name = event.get("span_attributes", {}).get("name", "")
+        if span_name == "root":
+            if span_id:
+                filtered_span_ids.add(span_id)
+            return None
+
+        return event
+
+    braintrust.set_filtering_function(filter_root)
+
+    test_logger = init_test_logger("test_project")
+
+    with test_logger.start_span(name="root") as root:
+        root_span_id = root.span_id
+        root_root_span_id = root.root_span_id
+
+        with root.start_span(name="parent") as parent:
+            parent_span_id = parent.span_id
+
+            with parent.start_span(name="child") as child:
+                child.log(input="child data")
+                child_span_id = child.span_id
+
+            with parent.start_span(name="sibling") as sibling:
+                sibling_span_id = sibling.span_id
+
+                with sibling.start_span(name="descendant") as descendant:
+                    descendant.log(input="descendant data")
+                    descendant_span_id = descendant.span_id
+
+    logs = with_memory_logger.pop()
+
+    # Verify root was filtered but all others are logged
+    logged_names = [l.get("span_attributes", {}).get("name") for l in logs]
+    assert "root" not in logged_names  # Filtered
+    assert "parent" in logged_names
+    assert "child" in logged_names
+    assert "sibling" in logged_names
+    assert "descendant" in logged_names
+
+    # Get each span's log
+    parent_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "parent")
+    child_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "child")
+    sibling_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "sibling")
+    descendant_log = next(l for l in logs if l.get("span_attributes", {}).get("name") == "descendant")
+
+    # All spans still have root_span_id pointing to the filtered root
+    assert parent_log["root_span_id"] == root_root_span_id
+    assert child_log["root_span_id"] == root_root_span_id
+    assert sibling_log["root_span_id"] == root_root_span_id
+    assert descendant_log["root_span_id"] == root_root_span_id
+
+    # Parent's span_parents points to filtered root (making it an orphan in UI)
+    assert root_span_id in parent_log["span_parents"]
+
+    # But the internal hierarchy is preserved:
+    # child and sibling are children of parent
+    assert parent_span_id in child_log["span_parents"]
+    assert parent_span_id in sibling_log["span_parents"]
+
+    # descendant is child of sibling
+    assert sibling_span_id in descendant_log["span_parents"]
+
+    braintrust.set_filtering_function(None)
+
+
 def test_attachment_unreadable_path_logs_warning(caplog):
     with caplog.at_level(logging.WARNING, logger="braintrust"):
         Attachment(