diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py index 9b17b1549..a9d57d9c7 100644 --- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py +++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py @@ -36,20 +36,81 @@ def get_used_gpu_memory_mib(): img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg) - mem_usage_history = [get_used_gpu_memory_mib()] + mem_usage_history = [ + get_used_gpu_memory_mib() + ] # Memory before loop (image loaded) - for i in range(10): - _ = img.read_region(device="cuda") + num_iterations = 10 + warmup_iterations = ( + 3 # Number of iterations to run before establishing a baseline + ) + + for i in range(num_iterations): + region_data = img.read_region(device="cuda") + # Explicitly delete the CuPy array + del region_data + # Force CuPy to free unused blocks from its memory pool + cp.get_default_memory_pool().free_all_blocks() mem_usage_history.append(get_used_gpu_memory_mib()) - print(mem_usage_history) + print(f"Full memory usage history (MiB): {mem_usage_history}") + + # mem_usage_history[0] is before any read_region calls + # mem_usage_history[k] is after the k-th iteration (read_region, del, + # free_all_blocks) + + # Baseline memory after warmup_iterations (e.g., after 3rd iteration) + # Ensure warmup_iterations is less than num_iterations + if warmup_iterations >= num_iterations: + pytest.fail( + "warmup_iterations must be less than num_iterations for this test " + "logic" + ) + + # Memory after the warmup period (e.g., after 3rd call, so index 3) + mem_after_warmup = mem_usage_history[warmup_iterations] + # Memory after all iterations (e.g., after 10th call, so index 10) + mem_at_end = mem_usage_history[num_iterations] - # The difference in memory usage should be less than 180MB. - # Note: Since we cannot measure GPU memory usage for a process, - # we use a rough number. - # (experimentally measured, assuming that each image load - # consumes around 50MB of GPU memory). - assert mem_usage_history[5] - mem_usage_history[9] < 180.0 + # Calculate the increase in memory after the warmup period + memory_increase_after_warmup = mem_at_end - mem_after_warmup + + print( + f"Memory after warmup ({warmup_iterations} iterations): " + f"{mem_after_warmup:.2f} MiB" + ) + print(f"Memory at end ({num_iterations} iterations): {mem_at_end:.2f} MiB") + print( + f"Memory increase after warmup: {memory_increase_after_warmup:.2f} MiB" + ) + + # The increase in memory after the warm-up phase and explicit freeing + # should be minimal, ideally close to zero for a perfectly clean operation. + # This threshold (leak_threshold_mib, e.g., 30.0 MiB) defines an acceptable + # upper bound for the *cumulative* memory increase observed over the + # (num_iterations - warmup_iterations) test iterations. + # It accounts for potential minor non-reclaimable memory that might + # accumulate due to factors like fragmentation, persistent driver/runtime + # overheads, or small, consistent allocation patterns within the tested + # function, even with explicit attempts to free memory. + # + # For instance, a 30.0 MiB threshold over 7 active test iterations + # (10 total iterations - 3 warmup iterations) allows for an average of + # roughly 4.3 MiB of such net memory growth per iteration during the + # measurement phase. + # This approach is significantly different from a previous version of this + # test, which used a 180MB threshold for a non-cumulative comparison + # (i.e., `memory_at_iteration_5 - memory_at_iteration_9`), which could + # be affected by transient spikes rather than sustained growth. + # If the `read_region` operation has a consistent memory leak (i.e., memory + # that is allocated and not freed properly on an ongoing basis), the + # `memory_increase_after_warmup` is expected to exceed this threshold. + leak_threshold_mib = 30.0 + assert memory_increase_after_warmup < leak_threshold_mib, ( + f"Memory increase ({memory_increase_after_warmup:.2f} MiB) " + f"exceeded threshold ({leak_threshold_mib} MiB) " + f"over {num_iterations - warmup_iterations} iterations after warmup." + ) def test_read_region_cpu_memleak(testimg_tiff_stripe_4096x4096_256):