Skip to content

Commit e9b1e11

Browse files
committed
AArch64 : multiply
Signed-off-by: Paul Guyot <[email protected]>
1 parent 644df21 commit e9b1e11

File tree

4 files changed

+210
-10
lines changed

4 files changed

+210
-10
lines changed

libs/jit/src/jit_aarch64.erl

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,32 +1741,69 @@ or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
17411741
State#state{stream = Stream1}.
17421742

17431743
add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1744-
I1 = jit_x86_64_asm_unimplemented:addq(Val, Reg),
1744+
I1 = jit_aarch64_asm:add(Reg, Reg, Val),
17451745
Stream1 = StreamModule:append(Stream0, I1),
17461746
State#state{stream = Stream1}.
17471747

17481748
sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1749-
I1 = jit_x86_64_asm_unimplemented:subq(Val, Reg),
1749+
I1 = jit_aarch64_asm:sub(Reg, Reg, Val),
17501750
Stream1 = StreamModule:append(Stream0, I1),
17511751
State#state{stream = Stream1}.
17521752

17531753
mul(State, _Reg, 1) ->
17541754
State;
17551755
mul(State, Reg, 2) ->
17561756
shift_left(State, Reg, 1);
1757+
mul(#state{available_regs = [Temp | _]} = State, Reg, 3) ->
1758+
I1 = jit_aarch64_asm:lsl(Temp, Reg, 1),
1759+
I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
1760+
Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
1761+
State#state{stream = Stream1};
17571762
mul(State, Reg, 4) ->
17581763
shift_left(State, Reg, 2);
1764+
mul(#state{available_regs = [Temp | _]} = State, Reg, 5) ->
1765+
I1 = jit_aarch64_asm:lsl(Temp, Reg, 2),
1766+
I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
1767+
Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
1768+
State#state{stream = Stream1};
1769+
mul(State0, Reg, 6) ->
1770+
State1 = mul(State0, Reg, 3),
1771+
mul(State1, Reg, 2);
1772+
mul(#state{available_regs = [Temp | _]} = State, Reg, 7) ->
1773+
I1 = jit_aarch64_asm:lsl(Temp, Reg, 3),
1774+
I2 = jit_aarch64_asm:sub(Reg, Temp, Reg),
1775+
Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
1776+
State#state{stream = Stream1};
17591777
mul(State, Reg, 8) ->
17601778
shift_left(State, Reg, 3);
1779+
mul(#state{available_regs = [Temp | _]} = State, Reg, 9) ->
1780+
I1 = jit_aarch64_asm:lsl(Temp, Reg, 3),
1781+
I2 = jit_aarch64_asm:add(Reg, Temp, Reg),
1782+
Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
1783+
State#state{stream = Stream1};
1784+
mul(State0, Reg, 10) ->
1785+
State1 = mul(State0, Reg, 5),
1786+
mul(State1, Reg, 2);
1787+
mul(#state{available_regs = [Temp | _]} = State, Reg, 15) ->
1788+
I1 = jit_aarch64_asm:lsl(Temp, Reg, 4),
1789+
I2 = jit_aarch64_asm:sub(Reg, Temp, Reg),
1790+
Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
1791+
State#state{stream = Stream1};
17611792
mul(State, Reg, 16) ->
17621793
shift_left(State, Reg, 4);
17631794
mul(State, Reg, 32) ->
17641795
shift_left(State, Reg, 5);
17651796
mul(State, Reg, 64) ->
17661797
shift_left(State, Reg, 6);
1767-
mul(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1768-
I1 = jit_x86_64_asm_unimplemented:imulq(Val, Reg),
1769-
Stream1 = StreamModule:append(Stream0, I1),
1798+
mul(
1799+
#state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
1800+
Reg,
1801+
Val
1802+
) ->
1803+
% multiply by decomposing by power of 2
1804+
I1 = jit_aarch64_asm:mov(Temp, Val),
1805+
I2 = jit_aarch64_asm:mul(Reg, Reg, Temp),
1806+
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
17701807
State#state{stream = Stream1}.
17711808

17721809
-spec decrement_reductions_and_maybe_schedule_next(state()) -> state().

libs/jit/src/jit_aarch64_asm.erl

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
-export([
2222
add/3,
2323
add/4,
24+
sub/3,
25+
sub/4,
26+
mul/3,
27+
madd/4,
2428
b/1,
2529
bcc/2,
2630
blr/1,
@@ -101,7 +105,9 @@ add(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =
101105
RnNum = reg_to_num(Rn),
102106
%% AArch64 ADD (immediate) encoding: 1001000100iiiiiiiiiiiinnnnndddddd
103107
%% 0x91000000 | Imm << 10 | Rn << 5 | Rd
104-
<<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>.
108+
<<(16#91000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
109+
add(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
110+
add(Rd, Rn, Rm, {lsl, 0}).
105111

106112
%% ADD (shifted register)
107113
%% ADD Rd, Rn, Rm, {lsl, #amount}
@@ -918,6 +924,28 @@ subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
918924
%% AArch64 SUBS (register): 11101011000mmmmm000000nnnnndddddd
919925
<<(16#EB000000 bor (RmNum bsl 16) bor (RnNum bsl 5) bor RdNum):32/little>>.
920926

927+
-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), integer() | aarch64_gpr_register()) ->
928+
binary().
929+
sub(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 4095 ->
930+
RdNum = reg_to_num(Rd),
931+
RnNum = reg_to_num(Rn),
932+
<<(16#D1000000 bor ((Imm band 16#FFF) bsl 10) bor (RnNum bsl 5) bor RdNum):32/little>>;
933+
sub(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
934+
sub(Rd, Rn, Rm, {lsl, 0}).
935+
936+
-spec sub(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), {lsl, 0..63}) ->
937+
binary().
938+
sub(Rd, Rn, Rm, {lsl, Amount}) when
939+
is_atom(Rd), is_atom(Rn), is_atom(Rm), is_integer(Amount), Amount >= 0, Amount =< 63
940+
->
941+
RdNum = reg_to_num(Rd),
942+
RnNum = reg_to_num(Rn),
943+
RmNum = reg_to_num(Rm),
944+
<<
945+
(16#CB000000 bor (RmNum bsl 16) bor ((Amount band 16#3F) bsl 10) bor (RnNum bsl 5) bor
946+
RdNum):32/little
947+
>>.
948+
921949
%% Emit an ADR (PC-relative address) instruction (AArch64 encoding)
922950
%% Dst is destination register atom, Offset is signed immediate (in bytes, -1MB..+1MB)
923951
-spec adr(aarch64_gpr_register(), integer()) -> binary().
@@ -927,3 +955,20 @@ adr(Dst, Imm) when is_atom(Dst), is_integer(Imm), Imm >= -1048576, Imm =< 104857
927955
ImmHi = Imm bsr 2,
928956
Word = (16#10000000) bor (ImmLo bsl 29) bor ((ImmHi band 16#7FFFF) bsl 5) bor DstNum,
929957
<<Word:32/little>>.
958+
959+
-spec mul(aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()) -> binary().
960+
mul(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) ->
961+
madd(Rd, Rn, Rm, xzr).
962+
963+
-spec madd(
964+
aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register(), aarch64_gpr_register()
965+
) -> binary().
966+
madd(Rd, Rn, Rm, Ra) when is_atom(Rd), is_atom(Rn), is_atom(Rm), is_atom(Ra) ->
967+
RdNum = reg_to_num(Rd),
968+
RnNum = reg_to_num(Rn),
969+
RmNum = reg_to_num(Rm),
970+
RaNum = reg_to_num(Ra),
971+
<<
972+
(16#9B000000 bor (RmNum bsl 16) bor (RaNum bsl 10) bor (RnNum bsl 5) bor
973+
RdNum):32/little
974+
>>.

tests/libs/jit/jit_aarch64_asm_tests.erl

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,53 @@
2626

2727
add_test_() ->
2828
[
29-
?_assertEqual(<<16#9100e0e7:32/little>>, jit_aarch64_asm:add(r7, r7, 56)),
30-
?_assertEqual(<<16#91000000:32/little>>, jit_aarch64_asm:add(r0, r0, 0)),
31-
?_assertEqual(<<16#91000421:32/little>>, jit_aarch64_asm:add(r1, r1, 1)),
32-
?_assertEqual(<<16#8b031041:32/little>>, jit_aarch64_asm:add(r1, r2, r3, {lsl, 4}))
29+
?_assertEqual(
30+
asm(<<16#9100e0e7:32/little>>, "add x7, x7, #56"), jit_aarch64_asm:add(r7, r7, 56)
31+
),
32+
?_assertEqual(
33+
asm(<<16#91000000:32/little>>, "add x0, x0, #0"), jit_aarch64_asm:add(r0, r0, 0)
34+
),
35+
?_assertEqual(
36+
asm(<<16#91000421:32/little>>, "add x1, x1, #1"), jit_aarch64_asm:add(r1, r1, 1)
37+
),
38+
?_assertEqual(
39+
asm(<<16#8b031041:32/little>>, "add x1, x2, x3, lsl #4"),
40+
jit_aarch64_asm:add(r1, r2, r3, {lsl, 4})
41+
),
42+
?_assertEqual(
43+
asm(<<16#8b030041:32/little>>, "add x1, x2, x3"), jit_aarch64_asm:add(r1, r2, r3)
44+
)
45+
].
46+
47+
sub_test_() ->
48+
[
49+
?_assertEqual(
50+
asm(<<16#d100e0e7:32/little>>, "sub x7, x7, #56"), jit_aarch64_asm:sub(r7, r7, 56)
51+
),
52+
?_assertEqual(
53+
asm(<<16#d1000000:32/little>>, "sub x0, x0, #0"), jit_aarch64_asm:sub(r0, r0, 0)
54+
),
55+
?_assertEqual(
56+
asm(<<16#d1000421:32/little>>, "sub x1, x1, #1"), jit_aarch64_asm:sub(r1, r1, 1)
57+
),
58+
?_assertEqual(
59+
asm(<<16#cb031041:32/little>>, "sub x1, x2, x3, lsl #4"),
60+
jit_aarch64_asm:sub(r1, r2, r3, {lsl, 4})
61+
),
62+
?_assertEqual(
63+
asm(<<16#cb030041:32/little>>, "sub x1, x2, x3"), jit_aarch64_asm:sub(r1, r2, r3)
64+
)
65+
].
66+
67+
madd_test_() ->
68+
[
69+
?_assertEqual(
70+
asm(<<16#9b037c41:32/little>>, "mul x1, x2, x3"), jit_aarch64_asm:mul(r1, r2, r3)
71+
),
72+
?_assertEqual(
73+
asm(<<16#9b031041:32/little>>, "madd x1, x2, x3, x4"),
74+
jit_aarch64_asm:madd(r1, r2, r3, r4)
75+
)
3376
].
3477

3578
b_test_() ->

tests/libs/jit/jit_aarch64_tests.erl

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,81 @@ move_to_native_register_test_() ->
948948
end)
949949
]
950950
end}.
951+
952+
mul_test0(State0, Reg, Imm, Dump) ->
953+
State1 = ?BACKEND:mul(State0, Reg, Imm),
954+
Stream = ?BACKEND:stream(State1),
955+
?assertEqual(dump_to_bin(Dump), Stream).
956+
957+
mul_test_() ->
958+
{setup,
959+
fun() ->
960+
?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
961+
end,
962+
fun(State0) ->
963+
[
964+
?_test(begin
965+
mul_test0(State0, r2, 2, <<
966+
"0: d37ff842 lsl x2, x2, #1"
967+
>>)
968+
end),
969+
?_test(begin
970+
mul_test0(State0, r2, 3, <<
971+
" 0: d37ff847 lsl x7, x2, #1\n"
972+
" 4: 8b0200e2 add x2, x7, x2"
973+
>>)
974+
end),
975+
?_test(begin
976+
mul_test0(State0, r2, 4, <<
977+
"0: d37ef442 lsl x2, x2, #2"
978+
>>)
979+
end),
980+
?_test(begin
981+
mul_test0(State0, r2, 5, <<
982+
" 0: d37ef447 lsl x7, x2, #2\n"
983+
" 4: 8b0200e2 add x2, x7, x2"
984+
>>)
985+
end),
986+
?_test(begin
987+
mul_test0(State0, r2, 6, <<
988+
" 0: d37ff847 lsl x7, x2, #1\n"
989+
" 4: 8b0200e2 add x2, x7, x2\n"
990+
" 8: d37ff842 lsl x2, x2, #1"
991+
>>)
992+
end),
993+
?_test(begin
994+
mul_test0(State0, r2, 7, <<
995+
" 0: d37df047 lsl x7, x2, #3\n"
996+
" 4: cb0200e2 sub x2, x7, x2"
997+
>>)
998+
end),
999+
?_test(begin
1000+
mul_test0(State0, r2, 8, <<
1001+
"0: d37df042 lsl x2, x2, #3"
1002+
>>)
1003+
end),
1004+
?_test(begin
1005+
mul_test0(State0, r2, 9, <<
1006+
" 0: d37df047 lsl x7, x2, #3\n"
1007+
" 4: 8b0200e2 add x2, x7, x2"
1008+
>>)
1009+
end),
1010+
?_test(begin
1011+
mul_test0(State0, r2, 10, <<
1012+
" 0: d37ef447 lsl x7, x2, #2\n"
1013+
" 4: 8b0200e2 add x2, x7, x2\n"
1014+
" 8: d37ff842 lsl x2, x2, #1"
1015+
>>)
1016+
end),
1017+
?_test(begin
1018+
mul_test0(State0, r2, 11, <<
1019+
" 0: d2800167 mov x7, #0xb // #11\n"
1020+
" 4: 9b077c42 mul x2, x2, x7"
1021+
>>)
1022+
end)
1023+
]
1024+
end}.
1025+
9511026
dump_to_bin(Dump) ->
9521027
dump_to_bin0(Dump, addr, []).
9531028

0 commit comments

Comments
 (0)