⚡️ Speed up method _Fanova._compute_variances by 980%
#172
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 980% (9.80x) speedup for
_Fanova._compute_variancesinoptuna/importance/_fanova/_fanova.py⏱️ Runtime :
18.5 milliseconds→1.71 milliseconds(best of144runs)📝 Explanation and details
The key optimization replaces
np.clip(marginal_variance, 0.0, None)with a simple conditional expressionmarginal_variance if marginal_variance > 0.0 else 0.0.This change eliminates the overhead of calling NumPy's general-purpose clipping function, which is designed to handle array inputs and more complex clipping scenarios. Since we're only dealing with scalar values and only need to clip negative values to zero (not upper bounds), the direct conditional is significantly faster.
The line profiler shows the critical bottleneck: the
np.clipcall took 82.9% of the total runtime (40.8ms out of 49.2ms). The optimized version reduces this to just 24.5% of a much smaller total runtime (2.3ms out of 9.4ms), achieving a 980% speedup.The optimization is particularly effective for the test cases that process many trees with variance computations:
This optimization maintains identical behavior - both approaches ensure non-negative variances - but achieves it through a more efficient code path that avoids NumPy's function call overhead for simple scalar operations.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import pytest
from optuna.importance._fanova._fanova import _Fanova
Dummy _FanovaTree for testing
class DummyFanovaTree:
def init(self, marginal_variances):
# marginal_variances: dict mapping tuple of raw_features to float
self.marginal_variances = marginal_variances
self.call_args = [] # For tracking calls
from optuna.importance._fanova._fanova import _Fanova
------------------- UNIT TESTS -------------------
----------- BASIC TEST CASES -----------
#------------------------------------------------
import numpy as np
imports
import pytest # used for our unit tests
from optuna.importance._fanova._fanova import _Fanova
--- Minimal stubs for dependencies to allow isolated testing of _Fanova._compute_variances ---
Stub for _imports.check()
class _imports:
@staticmethod
def check():
pass
Minimal stub for _FanovaTree with a configurable get_marginal_variance method
class _FanovaTree:
def init(self, marginal_variances):
# marginal_variances: dict mapping tuple of features to float
self.marginal_variances = marginal_variances
--- Minimal stub for RandomForestRegressor to allow instantiation ---
class RandomForestRegressor:
def init(self, **kwargs):
pass
from optuna.importance._fanova._fanova import _Fanova
------------------------------ UNIT TESTS ------------------------------
Basic Test Cases
def test_basic_already_computed():
# Should not recompute if already present
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = [_FanovaTree({(0,): 1.5})]
fanova._variances = {0: np.array([99.0])}
fanova._column_to_encoded_columns = [np.array([0])]
fanova._compute_variances(0) # 508ns -> 505ns (0.594% faster)
Edge Test Cases
def test_edge_negative_variance_clipped_to_zero():
# Marginal variance is negative, should be clipped to zero
fanova = _Fanova(2, 1, 2, 1, 42)
fanova._trees = [
_FanovaTree({(0,): -1.0}),
_FanovaTree({(0,): -2.5}),
]
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([0])]
fanova._compute_variances(0) # 13.9μs -> 3.92μs (255% faster)
def test_edge_empty_trees_list():
# _trees is empty, should create empty variances array
fanova = _Fanova(0, 1, 2, 1, 42)
fanova._trees = []
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([0])]
fanova._compute_variances(0) # 2.11μs -> 2.24μs (5.55% slower)
def test_edge_empty_column_to_encoded_columns():
# _column_to_encoded_columns is empty, should raise IndexError
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = [_FanovaTree({(0,): 1.0})]
fanova._variances = {}
fanova._column_to_encoded_columns = []
with pytest.raises(IndexError):
fanova._compute_variances(0) # 962ns -> 954ns (0.839% faster)
def test_edge_none_attributes():
# _trees, _variances, _column_to_encoded_columns are None, should assert
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = None
fanova._variances = None
fanova._column_to_encoded_columns = None
with pytest.raises(AssertionError):
fanova._compute_variances(0) # 793ns -> 842ns (5.82% slower)
def test_edge_feature_index_out_of_range():
# feature index out of range for _column_to_encoded_columns
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = [_FanovaTree({(0,): 1.0})]
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([0])]
with pytest.raises(IndexError):
fanova._compute_variances(1) # 946ns -> 968ns (2.27% slower)
def test_edge_non_integer_feature_index():
# feature index is not int, should raise TypeError
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = [_FanovaTree({(0,): 1.0})]
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([0])]
with pytest.raises(TypeError):
fanova._compute_variances("not_an_int") # 1.24μs -> 1.28μs (2.66% slower)
def test_edge_tree_returns_none():
# get_marginal_variance returns None, np.clip should error
class BadFanovaTree(_FanovaTree):
def get_marginal_variance(self, raw_features):
return None
fanova = _Fanova(1, 1, 2, 1, 42)
fanova._trees = [BadFanovaTree({})]
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([0])]
with pytest.raises(TypeError):
fanova._compute_variances(0) # 18.5μs -> 4.39μs (320% faster)
Large Scale Test Cases
def test_large_scale_random_variances():
# 500 trees, 5 features, random variances
import random
n_trees = 500
n_features = 5
random.seed(123)
fanova = _Fanova(n_trees, 1, 2, 1, 42)
# Each tree returns a random variance between -10 and 10 for each feature
trees = []
variances_matrix = []
for _ in range(n_trees):
tree_vars = {}
tree_row = []
for i in range(n_features):
v = random.uniform(-10, 10)
tree_vars[tuple([i])] = v
tree_row.append(v)
trees.append(_FanovaTree(tree_vars))
variances_matrix.append(tree_row)
fanova._trees = trees
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([i]) for i in range(n_features)]
# For each feature, compute expected clipped variances
for feature in range(n_features):
fanova._compute_variances(feature) # 5.40ms -> 500μs (978% faster)
expected = [np.clip(variances_matrix[tree_idx][feature], 0.0, None) for tree_idx in range(n_trees)]
def test_large_scale_all_zero_variances():
# 1000 trees, 3 features, all marginal variances are zero
n_trees = 1000
n_features = 3
fanova = _Fanova(n_trees, 1, 2, 1, 42)
fanova._trees = [_FanovaTree({tuple([i]): 0.0 for i in range(n_features)}) for _ in range(n_trees)]
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([i]) for i in range(n_features)]
for feature in range(n_features):
fanova._compute_variances(feature) # 6.45ms -> 572μs (1028% faster)
def test_large_scale_some_negative_variances():
# 1000 trees, 2 features, half negative, half positive marginal variances
n_trees = 1000
n_features = 2
fanova = _Fanova(n_trees, 1, 2, 1, 42)
trees = []
for idx in range(n_trees):
tree_vars = {}
for i in range(n_features):
tree_vars[tuple([i])] = -5.0 if idx % 2 == 0 else 5.0
trees.append(_FanovaTree(tree_vars))
fanova._trees = trees
fanova._variances = {}
fanova._column_to_encoded_columns = [np.array([i]) for i in range(n_features)]
for feature in range(n_features):
fanova._compute_variances(feature) # 4.34ms -> 399μs (986% faster)
expected = [0.0 if idx % 2 == 0 else 5.0 for idx in range(n_trees)]
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from optuna.importance._fanova._fanova import _Fanova
import pytest
def test__Fanova__compute_variances():
with pytest.raises(AssertionError):
_Fanova._compute_variances(_Fanova(0, 0, 0, 0, 0), 0)
🔎 Concolic Coverage Tests and Runtime
codeflash_concolic_gbwq510t/tmpbpcq8syo/test_concolic_coverage.py::test__Fanova__compute_variancesTo edit these changes
git checkout codeflash/optimize-_Fanova._compute_variances-mhtsbonnand push.