diff --git a/py/src/braintrust/test_trace.py b/py/src/braintrust/test_trace.py index c1bfeb9..7b309ea 100644 --- a/py/src/braintrust/test_trace.py +++ b/py/src/braintrust/test_trace.py @@ -234,19 +234,75 @@ async def fetch_fn(span_type): @pytest.mark.asyncio async def test_handle_empty_results(self): - """Test handling empty results.""" + """Test that empty results don't permanently cache, allowing re-fetch when data becomes available.""" + call_count = 0 + mock_spans = [make_span("span-1", "llm")] async def fetch_fn(span_type): - return [] + nonlocal call_count + call_count += 1 + if call_count == 1: + return [] + return mock_spans fetcher = CachedSpanFetcher(fetch_fn=fetch_fn) + # First call returns empty result = await fetcher.get_spans() assert len(result) == 0 + assert call_count == 1 - # Should still mark as fetched - await fetcher.get_spans(span_type=["llm"]) - # No additional assertions, just making sure it doesn't crash + # Second call should re-fetch since first was empty + result = await fetcher.get_spans() + assert call_count == 2 + assert len(result) == 1 + assert result[0].span_id == "span-1" + + @pytest.mark.asyncio + async def test_empty_then_populated_refetches(self): + """Test that fetch_fn returning [] first, then spans on second call, works correctly.""" + call_count = 0 + spans = [make_span("span-1", "llm"), make_span("span-2", "function")] + + async def fetch_fn(span_type): + nonlocal call_count + call_count += 1 + if call_count == 1: + return [] + return spans + + fetcher = CachedSpanFetcher(fetch_fn=fetch_fn) + + result1 = await fetcher.get_spans() + assert len(result1) == 0 + + result2 = await fetcher.get_spans() + assert call_count == 2 + assert len(result2) == 2 + assert {s.span_id for s in result2} == {"span-1", "span-2"} + + @pytest.mark.asyncio + async def test_empty_results_with_type_filter(self): + """Test that type-filtered fetches handle empty results correctly.""" + call_count = 0 + + async def fetch_fn(span_type): + nonlocal call_count + call_count += 1 + if call_count == 1: + return [] + return [make_span("span-1", "llm")] + + fetcher = CachedSpanFetcher(fetch_fn=fetch_fn) + + # First call with type filter returns empty + result1 = await fetcher.get_spans(span_type=["llm"]) + assert len(result1) == 0 + + # Second call with same type should re-fetch since type wasn't cached with results + result2 = await fetcher.get_spans(span_type=["llm"]) + assert call_count == 2 + assert len(result2) == 1 @pytest.mark.asyncio async def test_handle_empty_span_type_array(self): diff --git a/py/src/braintrust/trace.py b/py/src/braintrust/trace.py index ef7044e..52a5786 100644 --- a/py/src/braintrust/trace.py +++ b/py/src/braintrust/trace.py @@ -210,7 +210,8 @@ async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanDat # If no filter requested, fetch everything if not span_type or len(span_type) == 0: await self._fetch_spans(None) - self._all_fetched = True + if self._span_cache: # Only cache if we got results + self._all_fetched = True return self._get_from_cache(None) # Find which spanTypes we don't have in cache yet