Skip to content

Commit 69d5f92

Browse files
committed
AArch64: Fix bug in remaining reductions handling
Signed-off-by: Paul Guyot <[email protected]>
1 parent 4df5f16 commit 69d5f92

File tree

4 files changed

+64
-25
lines changed

4 files changed

+64
-25
lines changed

libs/jit/src/jit_aarch64.erl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,11 +1836,11 @@ decrement_reductions_and_maybe_schedule_next(
18361836
#state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0
18371837
) ->
18381838
% Load reduction count
1839-
I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT),
1839+
I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18401840
% Decrement reduction count
18411841
I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
18421842
% Store back the decremented value
1843-
I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT),
1843+
I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18441844
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
18451845
BNEOffset = StreamModule:offset(Stream1),
18461846
% Branch if reduction count is not zero
@@ -1879,11 +1879,11 @@ call_only_or_schedule_next(
18791879
Label
18801880
) ->
18811881
% Load reduction count
1882-
I1 = jit_aarch64_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT),
1882+
I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18831883
% Decrement reduction count
18841884
I2 = jit_aarch64_asm:subs(Temp, Temp, 1),
18851885
% Store back the decremented value
1886-
I3 = jit_aarch64_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT),
1886+
I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT),
18871887
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
18881888
BNEOffset = StreamModule:offset(Stream1),
18891889
% Branch to label if reduction count is not zero

libs/jit/src/jit_aarch64_asm.erl

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
ret/0,
4747
nop/0,
4848
str/2,
49+
str_w/2,
4950
str/3,
5051
tst/2,
5152
tst_w/2,
@@ -225,13 +226,11 @@ ldr_w(Dst, {BaseReg, Offset}) when
225226
is_atom(BaseReg),
226227
is_integer(Offset),
227228
Offset >= 0,
228-
Offset =< 32760,
229-
(Offset rem 8) =:= 0
229+
Offset =< 16380,
230+
(Offset rem 4) =:= 0
230231
->
231232
DstNum = reg_to_num(Dst),
232233
BaseRegNum = reg_to_num(BaseReg),
233-
%% AArch64 LDR (immediate) encoding for 64-bit: 11111001010iiiiiiiiiiibbbbbttttt
234-
%% 0xf9400000 | (Offset div 8) << 10 | BaseReg << 5 | Dst
235234
<<
236235
(16#B9400000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor DstNum):32/little
237236
>>.
@@ -588,6 +587,22 @@ str(Reg, {Base}, Imm) when
588587
BaseNum = reg_to_num(Base),
589588
<<(16#F8000400 bor ((Imm band 16#1FF) bsl 12) bor (BaseNum bsl 5) bor RegNum):32/little>>.
590589

590+
%% Emit a store register (STR) instruction for 32-bit store to memory
591+
-spec str_w(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}) -> binary().
592+
str_w(Src, {BaseReg, Offset}) when
593+
is_atom(Src),
594+
is_atom(BaseReg),
595+
is_integer(Offset),
596+
Offset >= 0,
597+
Offset =< 16380,
598+
(Offset rem 4) =:= 0
599+
->
600+
SrcNum = reg_to_num(Src),
601+
BaseRegNum = reg_to_num(BaseReg),
602+
<<
603+
(16#B9000000 bor ((Offset div 4) bsl 10) bor (BaseRegNum bsl 5) bor SrcNum):32/little
604+
>>.
605+
591606
%% Emit a load register (LDR) instruction for 64-bit store to memory, with store-update (writeback)
592607
-spec ldr
593608
(aarch64_gpr_register(), {aarch64_gpr_register(), integer()}, '!') -> binary();

tests/libs/jit/jit_aarch64_asm_tests.erl

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,30 @@ ldr_test_() ->
139139

140140
ldr_w_test_() ->
141141
[
142-
?_assertEqual(<<16#b9400821:32/little>>, jit_aarch64_asm:ldr_w(r1, {r1, 8})),
143-
?_assertEqual(<<16#b9406042:32/little>>, jit_aarch64_asm:ldr_w(r2, {r2, 96}))
142+
?_assertEqual(
143+
asm(<<16#b9400821:32/little>>, "ldr w1, [x1, 8]"), jit_aarch64_asm:ldr_w(r1, {r1, 8})
144+
),
145+
?_assertEqual(
146+
asm(<<16#b9406042:32/little>>, "ldr w2, [x2, 96]"), jit_aarch64_asm:ldr_w(r2, {r2, 96})
147+
),
148+
?_assertEqual(
149+
asm(<<16#b97ffc60:32/little>>, "ldr w0, [x3, 16380]"),
150+
jit_aarch64_asm:ldr_w(r0, {r3, 16380})
151+
)
152+
].
153+
154+
str_w_test_() ->
155+
[
156+
?_assertEqual(
157+
asm(<<16#b9000821:32/little>>, "str w1, [x1, 8]"), jit_aarch64_asm:str_w(r1, {r1, 8})
158+
),
159+
?_assertEqual(
160+
asm(<<16#b9006042:32/little>>, "str w2, [x2, 96]"), jit_aarch64_asm:str_w(r2, {r2, 96})
161+
),
162+
?_assertEqual(
163+
asm(<<16#b93ffc60:32/little>>, "str w0, [x3, 16380]"),
164+
jit_aarch64_asm:str_w(r0, {r3, 16380})
165+
)
144166
].
145167

146168
ldr_d_test_() ->

tests/libs/jit/jit_aarch64_tests.erl

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ call_primitive_0_test() ->
4141
Stream = ?BACKEND:stream(State1),
4242
Dump =
4343
<<
44-
"0: f9400050 ldr x16, [x2]\n"
44+
" 0: f9400050 ldr x16, [x2]\n"
4545
" 4: a9bf03fe stp x30, x0, [sp, #-16]!\n"
4646
" 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n"
4747
" c: d63f0200 blr x16\n"
@@ -58,7 +58,7 @@ call_primitive_1_test() ->
5858
Stream = ?BACKEND:stream(State1),
5959
Dump =
6060
<<
61-
"0: f9400450 ldr x16, [x2, #8]\n"
61+
" 0: f9400450 ldr x16, [x2, #8]\n"
6262
" 4: a9bf03fe stp x30, x0, [sp, #-16]!\n"
6363
" 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n"
6464
" c: d63f0200 blr x16\n"
@@ -149,20 +149,22 @@ call_primitive_extended_regs_test() ->
149149
call_ext_only_test() ->
150150
State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
151151
State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
152-
State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, -1]),
152+
State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]),
153153
Stream = ?BACKEND:stream(State2),
154154
Dump = <<
155-
" 0: f9400827 ldr x7, [x1, #16]\n"
155+
" 0: b9401027 ldr w7, [x1, #16]\n"
156156
" 4: f10004e7 subs x7, x7, #0x1\n"
157-
" 8: f9000827 str x7, [x1, #16]\n"
157+
" 8: b9001027 str w7, [x1, #16]\n"
158158
" c: 540000a1 b.ne 0x20 // b.any\n"
159159
" 10: 10000087 adr x7, 0x20\n"
160160
" 14: f9000427 str x7, [x1, #8]\n"
161161
" 18: f9400847 ldr x7, [x2, #16]\n"
162162
" 1c: d61f00e0 br x7\n"
163163
" 20: f9401047 ldr x7, [x2, #32]\n"
164-
" 24: 92800002 mov x2, #0xffffffffffffffff // #-1\n"
165-
" 28: d61f00e0 br x7"
164+
" 24: d2800042 mov x2, #0x2 // #2\n"
165+
" 28: d2800043 mov x3, #0x2 // #2\n"
166+
" 2c: 92800004 mov x4, #0xffffffffffffffff // #-1\n"
167+
" 30: d61f00e0 br x7"
166168
>>,
167169
?assertEqual(dump_to_bin(Dump), Stream).
168170

@@ -172,9 +174,9 @@ call_ext_last_test() ->
172174
State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]),
173175
Stream = ?BACKEND:stream(State2),
174176
Dump = <<
175-
" 0: f9400827 ldr x7, [x1, #16]\n"
177+
" 0: b9401027 ldr w7, [x1, #16]\n"
176178
" 4: f10004e7 subs x7, x7, #0x1\n"
177-
" 8: f9000827 str x7, [x1, #16]\n"
179+
" 8: b9001027 str w7, [x1, #16]\n"
178180
" c: 540000a1 b.ne 0x20 // b.any\n"
179181
" 10: 10000087 adr x7, 0x20\n"
180182
" 14: f9000427 str x7, [x1, #8]\n"
@@ -241,9 +243,9 @@ call_only_or_schedule_next_and_label_relocation_test() ->
241243
" 0: 1400000d b 0x34\n"
242244
" 4: 14000002 b 0xc\n"
243245
" 8: 14000009 b 0x2c\n"
244-
" c: f9400827 ldr x7, [x1, #16]\n"
246+
" c: b9401027 ldr w7, [x1, #16]\n"
245247
" 10: f10004e7 subs x7, x7, #0x1\n"
246-
" 14: f9000827 str x7, [x1, #16]\n"
248+
" 14: b9001027 str w7, [x1, #16]\n"
247249
" 18: 540000a1 b.ne 0x2c // b.any\n"
248250
" 1c: 10000087 adr x7, 0x2c\n"
249251
" 20: f9000427 str x7, [x1, #8]\n"
@@ -413,9 +415,9 @@ call_ext_test() ->
413415
?BACKEND:assert_all_native_free(State2),
414416
Stream = ?BACKEND:stream(State2),
415417
Dump = <<
416-
" 0: f9400827 ldr x7, [x1, #16]\n"
418+
" 0: b9401027 ldr w7, [x1, #16]\n"
417419
" 4: f10004e7 subs x7, x7, #0x1\n"
418-
" 8: f9000827 str x7, [x1, #16]\n"
420+
" 8: b9001027 str w7, [x1, #16]\n"
419421
" c: 540000a1 b.ne 0x20 // b.any\n"
420422
" 10: 10000087 adr x7, 0x20\n"
421423
" 14: f9000427 str x7, [x1, #8]\n"
@@ -465,9 +467,9 @@ call_fun_test() ->
465467
?BACKEND:assert_all_native_free(State9),
466468
Stream = ?BACKEND:stream(State9),
467469
Dump = <<
468-
" 0: f9400827 ldr x7, [x1, #16]\n"
470+
" 0: b9401027 ldr w7, [x1, #16]\n"
469471
" 4: f10004e7 subs x7, x7, #0x1\n"
470-
" 8: f9000827 str x7, [x1, #16]\n"
472+
" 8: b9001027 str w7, [x1, #16]\n"
471473
" c: 540000a1 b.ne 0x20 // b.any\n"
472474
" 10: 10000087 adr x7, 0x20\n"
473475
" 14: f9000427 str x7, [x1, #8]\n"

0 commit comments

Comments
 (0)