⚡️ Speed up function _get_tick_info by 6%
#170
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 6% (0.06x) speedup for
_get_tick_infoinoptuna/visualization/_rank.py⏱️ Runtime :
2.75 milliseconds→2.59 milliseconds(best of250runs)📝 Explanation and details
The optimization eliminates a redundant sorting operation by removing the
np.sort(target_values)call. The key insight is thatnp.quantile()internally sorts the input data when computing quantiles, making the explicit pre-sorting unnecessary.What was optimized:
sorted_target_values = np.sort(target_values)np.quantile(sorted_target_values, coloridxs)tonp.quantile(target_values, coloridxs)Why this improves performance:
np.quantile)sorted_target_valuesarrayPerformance characteristics:
The optimization is especially valuable since this function appears to be in a visualization pipeline where it may be called frequently on varying dataset sizes.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
from typing import List
function to test
import numpy as np
imports
import pytest # used for our unit tests
from optuna.visualization._rank import _get_tick_info
Define the _TickInfo dataclass as it's required for the function's output
class _TickInfo:
def init(self, coloridxs: List[float], text: List[str]):
self.coloridxs = coloridxs
self.text = text
from optuna.visualization._rank import _get_tick_info
unit tests
----------------- Basic Test Cases -----------------
def test_basic_integers():
# Test with a simple increasing integer array
arr = np.array([1, 2, 3, 4, 5])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 74.1μs -> 70.3μs (5.41% faster)
# Quantiles for [1,2,3,4,5]: 0:1, 0.25:2, 0.5:3, 0.75:4, 1:5
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (1)", "25% (2)", "50% (3)", "75% (4)", "max. (5)"]
)
def test_basic_floats():
# Test with a float array
arr = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 75.4μs -> 69.3μs (8.82% faster)
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (0)", "25% (1)", "50% (2)", "75% (3)", "max. (4)"]
)
def test_basic_unsorted():
# Test with an unsorted array
arr = np.array([5, 1, 3, 2, 4])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 73.5μs -> 69.3μs (6.14% faster)
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (1)", "25% (2)", "50% (3)", "75% (4)", "max. (5)"]
)
def test_basic_duplicates():
# Test with duplicate values
arr = np.array([1, 2, 2, 3, 4])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 72.3μs -> 67.2μs (7.50% faster)
# Quantiles: 0:1, 0.25:2, 0.5:2, 0.75:3, 1:4
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (1)", "25% (2)", "50% (2)", "75% (3)", "max. (4)"]
)
----------------- Edge Test Cases -----------------
def test_edge_all_same_value():
# All elements are the same
arr = np.array([7, 7, 7, 7, 7])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 72.5μs -> 68.2μs (6.39% faster)
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (7)", "25% (7)", "50% (7)", "75% (7)", "max. (7)"]
)
def test_edge_two_values():
# Only two values
arr = np.array([1, 9])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 72.4μs -> 67.8μs (6.82% faster)
# Quantiles: 0:1, 0.25:3, 0.5:5, 0.75:7, 1:9
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (1)", "25% (3)", "50% (5)", "75% (7)", "max. (9)"]
)
def test_edge_single_value():
# Single element array
arr = np.array([42])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 71.2μs -> 66.2μs (7.69% faster)
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (42)", "25% (42)", "50% (42)", "75% (42)", "max. (42)"]
)
def test_edge_empty_array():
# Empty array should raise a ValueError
arr = np.array([])
with pytest.raises(IndexError):
_get_tick_info(arr) # 53.3μs -> 50.1μs (6.40% faster)
def test_edge_negative_values():
# Array with negative values
arr = np.array([-10, -5, 0, 5, 10])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 77.7μs -> 74.6μs (4.22% faster)
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (-10)", "25% (-5)", "50% (0)", "75% (5)", "max. (10)"]
)
def test_edge_nan_values():
# Array with NaN values should propagate NaN in quantiles
arr = np.array([1, 2, np.nan, 4, 5])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 79.8μs -> 72.2μs (10.6% faster)
# All quantiles will be nan if any nan is present
for t in result.text:
pass
def test_edge_inf_values():
# Array with inf and -inf
arr = np.array([1, 2, np.inf, -np.inf, 5])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 86.4μs -> 80.1μs (7.90% faster)
# Quantiles: -inf, 1, 2, 5, inf
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=["min. (-inf)", "25% (1)", "50% (2)", "75% (5)", "max. (inf)"]
)
def test_edge_non_integer_quantiles():
# Array where quantiles are not integers
arr = np.array([0, 1, 2, 3])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 78.3μs -> 73.0μs (7.14% faster)
# 0.25 quantile: 0.75, 0.5:1.5, 0.75:2.25
expected = _TickInfo(
coloridxs=[0, 0.25, 0.5, 0.75, 1],
text=[
"min. (0)",
"25% (0.75)",
"50% (1.5)",
"75% (2.25)",
"max. (3)"
]
)
# Allow for floating point formatting
for i, t in enumerate(expected.text):
# Compare numbers as floats
val_expected = float(t.split('(')[1][:-1])
val_result = float(result.text[i].split('(')[1][:-1])
----------------- Large Scale Test Cases -----------------
def test_large_scale_monotonic():
# Large monotonic increasing array
arr = np.arange(1000)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 86.5μs -> 76.1μs (13.7% faster)
# Quantiles: 0:0, 0.25:249.75, 0.5:499.5, 0.75:749.25, 1:999
expected_values = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1])
for i, label in enumerate(["min.", "25%", "50%", "75%", "max."]):
val = float(result.text[i].split('(')[1][:-1])
def test_large_scale_random():
# Large random array
rng = np.random.default_rng(42)
arr = rng.normal(loc=0, scale=1, size=1000)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 90.0μs -> 90.9μs (0.973% slower)
expected_values = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1])
for i, label in enumerate(["min.", "25%", "50%", "75%", "max."]):
val = float(result.text[i].split('(')[1][:-1])
def test_large_scale_all_same():
# Large array, all values the same
arr = np.full(1000, 3.14)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 77.9μs -> 73.2μs (6.32% faster)
for t in result.text:
pass
def test_large_scale_with_nan():
# Large array with a single nan should propagate nan
arr = np.arange(1000, dtype=float)
arr[123] = np.nan
codeflash_output = _get_tick_info(arr); result = codeflash_output # 85.9μs -> 87.9μs (2.26% slower)
for t in result.text:
pass
def test_large_scale_with_inf():
# Large array with inf and -inf
arr = np.arange(1000, dtype=float)
arr[0] = -np.inf
arr[-1] = np.inf
codeflash_output = _get_tick_info(arr); result = codeflash_output # 97.1μs -> 84.8μs (14.5% faster)
# The others should be finite
for t in result.text[1:4]:
val = t.split('(')[1][:-1]
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from dataclasses import dataclass
import numpy as np
imports
import pytest # used for our unit tests
from optuna.visualization._rank import _get_tick_info
function to test
@DataClass
class _TickInfo:
coloridxs: list
text: list
from optuna.visualization._rank import _get_tick_info
unit tests
------------------- BASIC TEST CASES -------------------
def test_basic_integers():
# Basic test with consecutive integers
arr = np.array([1, 2, 3, 4, 5])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 87.4μs -> 83.4μs (4.80% faster)
def test_basic_floats():
# Basic test with floats
arr = np.array([0.0, 2.0, 4.0, 6.0, 8.0])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 78.0μs -> 72.0μs (8.41% faster)
def test_basic_unsorted():
# Test with unsorted input
arr = np.array([5, 1, 4, 2, 3])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 74.9μs -> 71.5μs (4.74% faster)
def test_basic_negative_values():
# Test with negative values
arr = np.array([-5, -3, -1, 1, 3])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 73.6μs -> 67.1μs (9.68% faster)
def test_basic_mixed_signs():
# Test with mix of positive and negative values
arr = np.array([-2, 0, 2, 4, 6])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 74.9μs -> 68.8μs (8.92% faster)
------------------- EDGE TEST CASES -------------------
def test_edge_single_element():
# Test with a single element
arr = np.array([42])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 71.9μs -> 68.4μs (5.03% faster)
def test_edge_two_elements():
# Test with two elements
arr = np.array([10, 20])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 72.1μs -> 68.8μs (4.75% faster)
def test_edge_all_same_value():
# Test with all elements the same
arr = np.array([7, 7, 7, 7, 7])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 72.7μs -> 67.7μs (7.39% faster)
def test_edge_empty_array():
# Test with empty array
arr = np.array([])
# Should raise a ValueError from np.quantile
with pytest.raises(IndexError):
_get_tick_info(arr) # 53.6μs -> 50.4μs (6.41% faster)
def test_edge_nan_values():
# Test with NaN values
arr = np.array([np.nan, np.nan, np.nan])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 82.2μs -> 76.0μs (8.18% faster)
# All quantiles should be nan
for txt in result.text:
pass
def test_edge_inf_values():
# Test with inf and -inf values
arr = np.array([-np.inf, 0, np.inf])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 90.9μs -> 86.2μs (5.46% faster)
def test_edge_large_and_small_values():
# Test with very large and very small values
arr = np.array([1e-10, 1e10, 1e5, -1e10, -1e5])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 80.3μs -> 74.9μs (7.24% faster)
def test_edge_non_integer_quantiles():
# Test with values that produce non-integer quantiles
arr = np.array([1, 2, 4, 8])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 75.7μs -> 73.0μs (3.69% faster)
------------------- LARGE SCALE TEST CASES -------------------
def test_large_scale_uniform():
# Large array of uniform values
arr = np.arange(1000)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 87.2μs -> 77.4μs (12.7% faster)
# Quantiles should be: [0, 249.75, 499.5, 749.25, 999]
# We'll check that the quantiles are close to expected
expected = [0, 249.75, 499.5, 749.25, 999]
for i, txt in enumerate(result.text):
value_str = txt.split('(')[-1].rstrip(')')
def test_large_scale_random():
# Large array of random values
np.random.seed(0)
arr = np.random.normal(loc=50, scale=10, size=1000)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 89.0μs -> 93.7μs (5.03% slower)
# Check that quantiles are sorted and within the expected range
quantiles = [float(txt.split('(')[-1].rstrip(')')) for txt in result.text]
def test_large_scale_repeated_values():
# Large array with repeated values
arr = np.array([5] * 1000)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 81.4μs -> 76.0μs (7.15% faster)
def test_large_scale_reverse_sorted():
# Large array in reverse order
arr = np.arange(999, -1, -1)
codeflash_output = _get_tick_info(arr); result = codeflash_output # 85.9μs -> 91.3μs (5.89% slower)
expected = [0, 249.75, 499.5, 749.25, 999]
for i, txt in enumerate(result.text):
value_str = txt.split('(')[-1].rstrip(')')
def test_large_scale_with_outliers():
# Large array with a few extreme outliers
arr = np.concatenate([np.arange(998), [1e6, -1e6]])
codeflash_output = _get_tick_info(arr); result = codeflash_output # 90.2μs -> 79.8μs (13.1% faster)
quantiles = [float(txt.split('(')[-1].rstrip(')')) for txt in result.text]
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-_get_tick_info-mhtrvyh9and push.