3
3
function tmul_threads! (C, A, B)
4
4
@tullio C[m,n] = A[m,k] * B[k,n]
5
5
end
6
-
7
6
function tmul_no_threads! (C, A, B)
8
7
@tullio C[m,n] = A[m,k] * B[k,n] threads= false
9
8
end
9
+ function lvmul_threads! (C, A, B)
10
+ @avxt for n ∈ indices ((C,B), 2 ), m ∈ indices ((C,A), 1 )
11
+ Cmn = zero (eltype (C))
12
+ for k ∈ indices ((A,B), (2 ,1 ))
13
+ Cmn += A[m,k] * B[k,n]
14
+ end
15
+ C[m,n] = Cmn
16
+ end
17
+ end
18
+ function lvmul_no_threads! (C, A, B)
19
+ @avx for n ∈ indices ((C,B), 2 ), m ∈ indices ((C,A), 1 )
20
+ Cmn = zero (eltype (C))
21
+ for k ∈ indices ((A,B), (2 ,1 ))
22
+ Cmn += A[m,k] * B[k,n]
23
+ end
24
+ C[m,n] = Cmn
25
+ end
26
+ end
10
27
11
28
function generic_matmul! (C, A, B)
12
29
istransposed (C) === ' N' || (generic_matmul! (untransposed (C), _transpose (B), _transpose (A)); return C)
@@ -17,6 +34,8 @@ function generic_matmul!(C, A, B)
17
34
LinearAlgebra. generic_matmatmul! (C, transA, transB, pA, pB)
18
35
end
19
36
37
+
38
+
20
39
function getfuncs (libs:: Vector{Symbol} , threaded:: Bool ):: Vector{Function}
21
40
map (libs) do i
22
41
if i === :MKL
@@ -31,6 +50,8 @@ function getfuncs(libs::Vector{Symbol}, threaded::Bool)::Vector{Function}
31
50
threaded ? tmul_threads! : tmul_no_threads!
32
51
elseif i === :Gaius
33
52
threaded ? Gaius. mul! : Gaius. mul_serial!
53
+ elseif i === :LoopVectorization
54
+ threaded ? lvmul_threads! : lvmul_no_threads!
34
55
elseif i === :generic || i === :Generic || i === :GENERIC
35
56
generic_matmul!
36
57
else
0 commit comments