@@ -53,23 +53,32 @@ function maybe_sleep(x)
5353end
5454
5555function benchmark_fun! (
56- f!:: F , C, A, B, sleep_time, force_belapsed = false , reference = nothing
56+ f!:: F , summarystat, C, A, B, sleep_time, force_belapsed = false , reference = nothing
5757) where {F}
5858 maybe_sleep (sleep_time)
59- tmin = @elapsed f! (C, A, B)
59+ t0 = @elapsed f! (C, A, B)
6060 isnothing (reference) || @assert C ≈ reference
61- if force_belapsed || 2 tmin < BenchmarkTools. DEFAULT_PARAMETERS. seconds
61+ if force_belapsed || 2 t0 < BenchmarkTools. DEFAULT_PARAMETERS. seconds
6262 maybe_sleep (sleep_time)
63- tmin = min (tmin, @belapsed $ f! ($ C, $ A, $ B))
64- else # if tmin < BenchmarkTools.DEFAULT_PARAMETERS.seconds
63+ br = @benchmark $ f! ($ C, $ A, $ B)
64+ tret = summarystat (br). time
65+ if summarystat === minimum # don't want to do this for `median` or `mean`, for example
66+ tret = min (tret, t0)
67+ end
68+ else
6569 maybe_sleep (sleep_time)
66- tmin = min (tmin, @elapsed f! (C, A, B))
67- if tmin < 2 BenchmarkTools. DEFAULT_PARAMETERS. seconds
70+ t1 = @elapsed f! (C, A, B)
71+ maybe_sleep (sleep_time)
72+ t2 = @elapsed f! (C, A, B)
73+ if (t0+ t1) < 4 BenchmarkTools. DEFAULT_PARAMETERS. seconds
6874 maybe_sleep (sleep_time)
69- tmin = min (tmin, @elapsed f! (C, A, B))
75+ t3 = @elapsed f! (C, A, B)
76+ tret = summarystat ((t0, t1, t2, t3))
77+ else
78+ tret = summarystat ((t0, t1, t2))
7079 end
7180 end
72- tmin
81+ return tret
7382end
7483_mat_size (M, N, :: typeof (adjoint)) = (N, M)
7584_mat_size (M, N, :: typeof (transpose)) = (N, M)
@@ -79,7 +88,6 @@ function alloc_mat(_M, _N, memory::Vector{T}, off, f = identity) where {T}
7988 A = f (reshape (view (memory, (off+ 1 ): (off+ M* N)), (M, N)))
8089 A, off + align (M* N, T)
8190end
82-
8391matmul_sizes (s:: Integer ) = (s,s,s)
8492matmul_sizes (mkn:: Tuple{Vararg{Integer,3}} ) = mkn
8593matmul_length (s) = prod (matmul_sizes (s))
@@ -174,14 +182,35 @@ function default_libs(::Type{T}) where {T}
174182 end
175183end
176184
185+
186+
177187"""
178188 runbench(T = Float64;
179189 libs = default_libs(T),
180190 sizes = logspace(2, 4000, 200),
181191 threaded::Bool = Threads.nthreads() > 1,
182192 A_transform = identity,
183193 B_transform = identity,
184- sleep_time = 0.0)
194+ sleep_time = 0.0,
195+ summarystat = median)
196+
197+ - T: The element type of the matrices.
198+ - libs: Libraries to benchmark.
199+ - sizes: Sizes of matrices to benchmark. Must be an iterable with either
200+ `eltype(sizes) === Int` or `eltype(sizes) === NTuple{3,Int}`.
201+ If the former, the matrices are square, with each dimension equal to the value.
202+ If `i::NTuple{3,Int}`, it benchmarks `C = A * B` where `A` is `i[1]` by `i[2]`,
203+ `B` is `i[2]` by `i[3]` and `C` is `i[1]` by `i[3]`.
204+ - threaded: Should it benchmark multithreaded implementations?
205+ - A_transform: a function to apply to `A`. Defaults to `identity`, but can be `adjoint`.
206+ - B_transofrm: a function to apply to `B`. Defaults to `identity`, but can be `adjoint`.
207+ - sleep_time: The use of this keyword argument is discouraged. If set, it will call `sleep`
208+ in between benchmarks, the idea being to help keep the CPU cool. This is an unreliable
209+ means of trying to get more reliable benchmarks. Instead, it's reccommended you disable
210+ your systems turbo. Disabling it -- and reenabling when you're done benchmarking --
211+ should be possible without requiring a reboot.
212+ - summarystat: Which summary statistic should be reported? Defaults to `minimum`
213+
185214"""
186215function runbench (
187216 :: Type{T} = Float64;
@@ -190,7 +219,8 @@ function runbench(
190219 threaded:: Bool = Threads. nthreads () > 1 ,
191220 A_transform = identity,
192221 B_transform = identity,
193- sleep_time = 0.0
222+ sleep_time = 0.0 ,
223+ summarystat = minimum
194224) where {T}
195225 if threaded
196226 mkl_set_num_threads (num_cores ())
@@ -230,7 +260,7 @@ function runbench(
230260 for i ∈ eachindex (funcs)
231261 C, ref = i == 1 ? (C0, nothing ) : (fill! (C1,junk (T)), C0)
232262 t = benchmark_fun! (
233- funcs[i], C, A, B, sleep_time, force_belapsed, ref
263+ funcs[i], summarystat, C, A, B, sleep_time, force_belapsed, ref
234264 )
235265 gflops = 2e-9 M* K* N / t
236266 times[j,i] = t
0 commit comments