Skip to content

Commit a3e9d17

Browse files
committed
AArch64: Fix bug in remaining reductions handling
Signed-off-by: Paul Guyot <[email protected]>
1 parent 8dd841e commit a3e9d17

File tree

3 files changed

+47
-10
lines changed

3 files changed

+47
-10
lines changed

libs/jit/src/jit_aarch64.erl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,11 +1836,11 @@ decrement_reductions_and_maybe_schedule_next(
18361836
#state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0
18371837
) ->
18381838
% Load reduction count
1839-
I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT),
1839+
I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18401840
% Decrement reduction count
18411841
I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
18421842
% Store back the decremented value
1843-
I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT),
1843+
I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18441844
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
18451845
BNEOffset = StreamModule:offset(Stream1),
18461846
% Branch if reduction count is not zero
@@ -1879,11 +1879,11 @@ call_only_or_schedule_next(
18791879
Label
18801880
) ->
18811881
% Load reduction count
1882-
I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT),
1882+
I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18831883
% Decrement reduction count
18841884
I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
18851885
% Store back the decremented value
1886-
I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT),
1886+
I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18871887
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
18881888
BNEOffset = StreamModule:offset(Stream1),
18891889
% Branch to label if reduction count is not zero

libs/jit/src/jit_aarch64_asm.erl

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
ret/0,
4747
nop/0,
4848
str/2,
49+
str_w/2,
4950
str/3,
5051
tst/2,
5152
tst_w/2,
@@ -225,13 +226,11 @@ ldr_w(Dst, {BaseReg, Offset}) when
225226
is_atom(BaseReg),
226227
is_integer(Offset),
227228
Offset >= 0,
228-
Offset =< 32760,
229-
(Offset rem 8) =:= 0
229+
Offset =< 16380,
230+
(Offset rem 4) =:= 0
230231
->
231232
DstNum = reg_to_num(Dst),
232233
BaseRegNum = reg_to_num(BaseReg),
233-
%% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt
234-
%% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst
235234
<<
236235
(16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little
237236
>>.
@@ -588,6 +587,22 @@ str(Reg, {Base}, Imm) when
588587
BaseNum = reg_to_num(Base),
589588
<<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>.
590589

590+
%% Emit a store register (STR) instruction for 32-bit store to memory
591+
-spec str_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
592+
str_w(Src, {BaseReg, Offset}) when
593+
is_atom(Src),
594+
is_atom(BaseReg),
595+
is_integer(Offset),
596+
Offset >= 0,
597+
Offset =< 16380,
598+
(Offset rem 4) =:= 0
599+
->
600+
SrcNum = reg_to_num(Src),
601+
BaseRegNum = reg_to_num(BaseReg),
602+
<<
603+
(16#B9000000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor SrcNum):32/little
604+
>>.
605+
591606
%% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback)
592607
-spec ldr
593608
(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary();

tests/libs/jit/jit_aarch64_asm_tests.erl

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,30 @@ ldr_test_() ->
139139

140140
ldr_w_test_() ->
141141
[
142-
?_assertEqual(<<16#b9400821:32/little>>, jit_aarch64_asm:ldr_w(r1, {r1, 8})),
143-
?_assertEqual(<<16#b9406042:32/little>>, jit_aarch64_asm:ldr_w(r2, {r2, 96}))
142+
?_assertEqual(
143+
asm(<<16#b9400821:32/little>>, "ldr w1, [x1, 8]"), jit_aarch64_asm:ldr_w(r1, {r1, 8})
144+
),
145+
?_assertEqual(
146+
asm(<<16#b9406042:32/little>>, "ldr w2, [x2, 96]"), jit_aarch64_asm:ldr_w(r2, {r2, 96})
147+
),
148+
?_assertEqual(
149+
asm(<<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]"),
150+
jit_aarch64_asm:ldr_w(r0, {r3, 16380})
151+
)
152+
].
153+
154+
str_w_test_() ->
155+
[
156+
?_assertEqual(
157+
asm(<<16#b9000821:32/little>>, "str w1, [x1, 8]"), jit_aarch64_asm:str_w(r1, {r1, 8})
158+
),
159+
?_assertEqual(
160+
asm(<<16#b9006042:32/little>>, "str w2, [x2, 96]"), jit_aarch64_asm:str_w(r2, {r2, 96})
161+
),
162+
?_assertEqual(
163+
asm(<<16#b93ffc60:32/little>>, "str w0, [x3, 16380]"),
164+
jit_aarch64_asm:str_w(r0, {r3, 16380})
165+
)
144166
].
145167

146168
ldr_d_test_() ->

0 commit comments

Comments
 (0)