diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index c0569ec05b..f2e48005a3 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -366,6 +366,33 @@ jobs: arch: "s390x" library-arch: s390x-linux-gnu + # riscv32-ilp32 build + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + + # riscv32-ilp32 build + jit + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=riscv32 -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + jit_target_arch: "riscv32" + env: ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }} CC: ${{ matrix.cc }} @@ -386,7 +413,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' run: | # Replace Azure mirrors with official Ubuntu repositories sudo sed -i 's|azure\.||g' /etc/apt/sources.list @@ -420,6 +447,97 @@ jobs: set(MBEDTLS_LIBRARIES_DIR /usr/lib/${{ matrix.library-arch }}) EOF + - name: "Setup cross compilation architecture (riscv32)" + if: matrix.library-arch == 'riscv32-linux-gnu-ilp32' + run: | + sudo dpkg --add-architecture ${{ matrix.arch }} + + # Download toolchain and libraries from release + gh release download riscv-toolchain-2025.10.18 \ + -R pguyot/crossbuild-essential-riscv32 \ + --pattern 'riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb' \ + --pattern 'libc6-ilp32_2.39-0ubuntu1_riscv32.deb' \ + --pattern 'libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb' \ + --pattern 'libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb' \ + --pattern 'zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb' \ + --pattern 'zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb' \ + --pattern 'libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb' \ + --pattern 'libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb' \ + --pattern 'libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb' \ + --pattern 'libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb' + + # Install the toolchain + sudo dpkg -i riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb + + # Add to PATH for all subsequent steps + echo "/opt/riscv32-ilp32/bin" >> $GITHUB_PATH + + # Install the libs + sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb + + sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb + sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb + + # Install mbedtls runtime packages first (in dependency order) + sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb + # Then install the dev package + sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb + + sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources + + cat > ${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake <<'EOF' + # Toolchain file for RISC-V32 ILP32 (RV32-IMAC) cross-compilation + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR riscv32) + set(CMAKE_C_LIBRARY_ARCHITECTURE riscv32-linux-gnu-ilp32) + + # Specify the cross compiler + set(CMAKE_C_COMPILER riscv32-unknown-linux-gnu-gcc) + set(CMAKE_CXX_COMPILER riscv32-unknown-linux-gnu-g++) + + # Specify the target architecture + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + + # Set up paths for cross-compiled libraries + set(ZLIB_LIBRARY /usr/lib/riscv32-linux-gnu-ilp32/libz.so CACHE FILEPATH "") + set(ZLIB_INCLUDE_DIR /usr/include/riscv32-linux-gnu CACHE PATH "") + set(ZLIB_FOUND TRUE CACHE BOOL "") + + # MbedTLS configuration + set(MBEDTLS_ROOT_DIR /usr) + set(MBEDTLS_LIBRARIES_DIR /usr/lib/riscv32-linux-gnu-ilp32) + + # Add cross-compilation include path to compiler flags + include_directories(SYSTEM /usr/include/riscv32-linux-gnu) + + # Search for programs in the build host directories + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + + # Search for libraries and headers in the target directories + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + EOF + + # Set up qemu-user binfmt to find libraries + sudo ln -s /opt/riscv32-ilp32/sysroot/lib/ld-linux-riscv32-ilp32.so.1 /lib/ld-linux-riscv32-ilp32.so.1 + sudo mkdir -p /usr/gnemul + sudo ln -s /opt/riscv32-ilp32/sysroot /usr/gnemul/qemu-riscv32 + + # Copy cross-compiled libraries to sysroot for qemu-user + sudo cp /usr/lib/${{ matrix.library-arch }}/libz.so.1* /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedtls.so.14 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedcrypto.so.7 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedx509.so.1 /opt/riscv32-ilp32/sysroot/lib/ + + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "APT update" run: sudo apt update -y @@ -630,7 +748,7 @@ jobs: valgrind --error-exitcode=1 ./src/AtomVM tests/libs/jit/test_jit.avm - name: "Test: test_jit.avm" - timeout-minutes: 20 + timeout-minutes: 60 if: matrix.otp != '21' && matrix.otp != '22' working-directory: build run: | diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml index 9cf01d045a..4f71a02bc3 100644 --- a/.github/workflows/pico-build.yaml +++ b/.github/workflows/pico-build.yaml @@ -41,12 +41,30 @@ jobs: strategy: matrix: board: ["pico", "pico_w", "pico2"] - language: ["cpp"] + platform: [""] + jit: ["", "-DAVM_DISABLE_JIT=OFF"] + include: + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "" + + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "-DAVM_DISABLE_JIT=OFF" steps: - name: Checkout repo uses: actions/checkout@v4 + - uses: erlef/setup-beam@v1 + with: + otp-version: "28.1" + rebar3-version: "3.24.0" + hexpm-mirrors: | + https://builds.hex.pm + https://repo.hex.pm + https://cdn.jsdelivr.net/hex + - name: "apt update" run: sudo apt update @@ -54,8 +72,17 @@ jobs: run: | sudo apt install -y \ cmake doxygen gperf ninja-build gcc-arm-none-eabi \ - libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \ - erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3 + libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib + + - name: Install riscv32 toolchain + if: matrix.platform == '-DPICO_PLATFORM=rp2350-riscv' + run: | + sudo mkdir -p /opt/riscv32-toolchain + cd /opt/riscv32-toolchain + sudo wget -q https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz + sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz + sudo rm riscv-toolchain-15-x86_64-lin.tar.gz + echo "/opt/riscv32-toolchain/bin" >> $GITHUB_PATH - name: "Git config safe.directory for codeql" run: git config --global --add safe.directory /__w/AtomVM/AtomVM @@ -63,7 +90,7 @@ jobs: - name: "Initialize CodeQL" uses: github/codeql-action/init@v3 with: - languages: ${{matrix.language}} + languages: "cpp" build-mode: manual queries: +./code-queries/term-to-non-term-func.ql,./code-queries/non-term-to-term-func.ql @@ -74,7 +101,7 @@ jobs: set -euo pipefail mkdir build cd build - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }} ninja - name: "Perform CodeQL Analysis" @@ -97,7 +124,7 @@ jobs: mkdir build.nosmp cd build.nosmp # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1 + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1 cmake --build . --target=rp2_tests - name: Run tests with rp2040js @@ -112,7 +139,7 @@ jobs: npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2 - name: Build atomvmlib.uf2 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | set -euo pipefail @@ -122,7 +149,7 @@ jobs: make atomvmlib-${{ matrix.board }}.uf2 - name: Rename AtomVM and write sha256sum - if: startsWith(github.ref, 'refs/tags/') + if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd src/platforms/rp2/build @@ -137,7 +164,7 @@ jobs: popd - name: Rename atomvmlib and write sha256sum - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd build/libs @@ -148,7 +175,7 @@ jobs: - name: Release (Pico & Pico2) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true @@ -160,7 +187,7 @@ jobs: - name: Release (PicoW) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 32484ee851..307917422f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/doc/src/atomvm-internals.md b/doc/src/atomvm-internals.md index 62ad888a71..4d30e6767d 100644 --- a/doc/src/atomvm-internals.md +++ b/doc/src/atomvm-internals.md @@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual - Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target. - Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop. -JIT is available on some platforms (currently only x86_64, aarch64 and armv6m) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. +JIT is available on some platforms (currently x86_64, aarch64, armv6m and riscv32) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled. @@ -154,9 +154,16 @@ The JIT compiler is written in Erlang and is therefore precompiled. When a proce JIT compiler is composed of two main interfaces : backend and stream. -A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are `jit_x86_64` and `jit_aarch64` which are suitable for systems with System V X86 64 ABI or AArch64 ABI. +A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are : +- `jit_x86_64` for System V X86 64 ABI +- `jit_aarch64` for AArch64 ABI +- `jit_armv6m` for AArch32 ABI +- `jit_riscv32` for rv32imc ilp32 ABI. -A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Two implementations currently exist: `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop, and `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix. +A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Three implementations currently exist: +- `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop +- `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix +- `jit_stream_flash` available on Pico that allows for embedded JIT. ### Embedded JIT and Native diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 0989628fd3..0c1c8fb248 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -27,6 +27,7 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). -define(JIT_ARCH_ARMV6M, 3). +-define(JIT_ARCH_RISCV32, 4). -define(JIT_VARIANT_PIC, 1). -define(JIT_VARIANT_FLOAT32, 2). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 1b00db5238..88658b2796 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -32,6 +32,8 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32 + jit_riscv32_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index cd9646790d..930b79dc37 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -93,6 +93,7 @@ compile(Target, Dir, Path) -> "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; "armv6m" -> ?JIT_ARCH_ARMV6M; + "riscv32" -> ?JIT_ARCH_RISCV32; _ -> error({unsupported_target, Target}) end, diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl new file mode 100644 index 0000000000..4cfb9a8f65 --- /dev/null +++ b/libs/jit/src/jit_riscv32.erl @@ -0,0 +1,3066 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). +-include("term.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). +%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns). +%% s0-s11 are callee-saved registers (must be preserved across calls). +%% t0-t6 are caller-saved temporary registers. +%% sp is the stack pointer. +%% ra is the return address register. +%% zero (x0) is hardwired to constant 0. +%% This implementation uses RV32IMC (base + multiply/compressed extensions). +%% +%% See: RISC-V Calling Convention +%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf +%% +%% Registers used by the JIT backend (RISC-V32): +%% - Argument/return: a0-a7 (up to 8 args in registers) +%% - Callee-saved: s0-s11 (must preserve) +%% - Temporaries: t0-t6 (caller-saved) +%% - Stack pointer: sp +%% - Return address: ra +%% - Zero register: zero (always 0) +%% - Available for JIT scratch: t0-t6 (7 temp registers) +%% +%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding, +%% allowing access to all 32 registers. +%% +%% For more details, refer to the RISC-V ILP32 Procedure Call Standard. + +-type riscv32_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | t0 + | t1 + | t2 + | t3 + | t4 + | t5 + | t6 + | s0 + | s1 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | sp + | ra. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse + Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse + Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse + Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse + Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [riscv32_register()], + used_regs :: [riscv32_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_riscv32_register() :: + {free, riscv32_register()} | riscv32_register(). + +-type condition() :: + {riscv32_register(), '<', integer()} + | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {maybe_free_riscv32_register(), '==', integer()} + | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(int)', maybe_free_riscv32_register(), '==', integer()} + | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(bool)', maybe_free_riscv32_register(), '==', false} + | {'(bool)', maybe_free_riscv32_register(), '!=', false} + | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a0). +-define(NATIVE_INTERFACE_REG, a2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) +-define(JITSTATE_REG, a1). +% Return address register (like LR in AArch64) +-define(RA_REG, ra). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 8). + +%% RISC-V32 register mappings + +%% Use t3 as temporary for some operations +-define(IP_REG, t3). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% RISC-V32 ILP32 ABI register allocation: +%% - a0: context pointer (reserved, passed as first parameter) +%% - a1-a5: available for parameters to native functions (up to 6 params) +%% - a2: native interface pointer (reserved) +%% - t0-t6: temporaries, caller-saved, available for JIT use +%% - s0-s11: callee-saved (would need to be saved/restored) +-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). +-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [riscv32_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [riscv32_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr a3, pc+4 +%% push {a1, r4, r5, r6, r7, lr} +%% add pc, pc, a3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + % Create jump table entry: AUIPC + JALR (8 bytes total) + % This will be patched later in update_branches/2 + Offset = StreamModule:offset(Stream0), + JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + + % Record both AUIPC and JALR offsets for patching + Reloc = {N, Offset, jump_table_auipc_jalr}, + UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, + + jump_table0(UpdatedState, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> + % Generate pc_relative_address and pad to 8 bytes with NOP + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 -> + % Handle 2-byte aligned offsets and pad to 8 bytes + % Handle both positive and negative offsets (Erlang rem can be negative) + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {far_branch, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> + % RISC-V jal has ±1MB range + % Optimize to near branch: jal + nops to fill original size + DirectBranch = jit_riscv32_asm:jal(zero, Rel), + case byte_size(DirectBranch) of + 2 -> + <>; + 4 -> + <> + end; + true -> + % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) + % Split the relative offset into upper 20 bits and lower 12 bits + Hi20 = (Rel + 16#800) bsr 12, + Lo12 = Rel - (Hi20 bsl 12), + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + Entry = <>, + case byte_size(Entry) of + 6 -> <>; + 8 -> Entry + end + end; + jump_table_auipc_jalr -> + % Calculate PC-relative offset from AUIPC instruction to target + % AUIPC is at Offset, JALR is at Offset+4 + % Target is at LabelOffset + % Offset from AUIPC PC to target + PCRelOffset = LabelOffset - Offset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + % Map to 8 bytes + JumpTableEntry = <>, + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 124 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4); + N when N * 4 < 256 -> + % Can encode N * 4 directly in li instruction + I1 = jit_riscv32_asm:li(TargetReg, N * 4), + I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_riscv32_asm:li(TargetReg, N), + I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2), + I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % In RISC-V, all up to 8 arguments fit in registers (a0-a7) + % Always use tail call when calling primitives in tail position + State4 = + case Args1 of + [FirstArg, jit_state | ArgsT] -> + % Use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register. +%% RA is preserved across regular calls (call_func_ptr saves/restores it), +%% so when the called C primitive returns, it returns to opcodesswitch.h. +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Jump to address in register (tail call) + I1 = jit_riscv32_asm:jr(Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % RISC-V doesn't have a separate cmp instruction, use beq directly + I2 = + case Reg of + % Return value is already in a0 + a0 -> <<>>; + % Move to a0 (return register) + _ -> jit_riscv32_asm:mv(a0, Reg) + end, + I3 = jit_riscv32_asm:ret(), + % Branch if equal (skip the return) + % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State1#state{stream = Stream1}. + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = pc_relative_address(Temp, NetOffset), + % Add target offset to get final absolute address + I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_riscv32_asm:jr(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct J instruction + Rel = TargetOffset - Offset, + jit_riscv32_asm:j(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset +) -> + % Far branch: use auipc + jalr sequence for PC-relative addressing + % This computes: PC + Immediate and jumps to it + + Rel = TargetOffset - Offset, + % Split the relative offset into upper 20 bits and lower 12 bits + % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12) + % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set, + % we need to add 0x800 before splitting to compensate + Hi20 = (Rel + 16#800) bsr 12, + Lo12Unsigned = Rel band 16#FFF, + % Convert to signed 12-bit value: if bit 11 is set, subtract 4096 + Lo12 = + if + Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000; + true -> Lo12Unsigned + end, + + % TempReg = PC + (Hi20 << 12) + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + % Jump to TempReg + sign_extend(Lo12) + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <>. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc TempReg, 0 + % Placeholder: jalr zero, TempReg, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) + % Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc t6, 0 + % Placeholder: jalr zero, t6, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, t6}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchInstrOffset = Offset + BranchInstrDelta, + BranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + BranchInstrOffset = Offset + BranchInstrDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + %% Use all-1s placeholder for flash compatibility (can only flip 1->0) + ElseJumpInstr = <<16#FFFF:16>>, + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]), + Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset), + %% Assert that replacement is 2 bytes (c.j range: -2048..2046) + %% If this fails, the if/else blocks are too large + 2 = byte_size(NewElseJumpInstr), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + {beq | bne | blt | bge, atom(), atom() | integer()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + {State1, {bge, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) + % Load immediate into a temp register for comparison + [Temp | _] = State0#state.available_regs, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, RegB}, 0}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {beq, Reg, Val}, 0}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, {bne, RegA, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {blt, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bits using ANDI or li+and + TestCode = + if + Val >= -2048 andalso Val =< 2047 -> + %% Can use ANDI instruction directly + jit_riscv32_asm:andi(Temp, Reg, Val); + true -> + %% Need to load immediate into temp register first + TestCode0 = jit_riscv32_asm:li(Temp, Val), + TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp), + <> + end, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Branch if result is zero (no bits set, NOT != 0) + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {beq, Temp, zero}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Temp, Reg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Reg, Reg), + I2 = jit_riscv32_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_riscv32_asm:mv(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + Stream2 = State2#state.stream, + %% Compare Temp with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT2] = AT, + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream4 = StreamModule:append(Stream3, BranchInstr), + State4 = State3#state{ + stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + }, + {State4, {beq, Temp, MaskReg}, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + {State1, Reg} = and_(State0, RegTuple, Mask), + Stream1 = State1#state.stream, + %% Compare Reg with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT] = State1#state.available_regs, + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = <<16#FFFFFFFF:32/little>>, + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, {beq, Reg, MaskReg}, BranchDelta} + end. + +-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [riscv32_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) -> + {#state{}, riscv32_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_riscv32_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), riscv32_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + % Save RA (like AArch64 saves LR) so it's preserved across jalr calls + SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Calculate stack space: round up to 16-byte boundary for RISC-V ABI + NumRegs = length(SavedRegs), + StackBytes = NumRegs * 4, + AlignedStackBytes = ((StackBytes + 15) div 16) * 16, + + Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0), + + % Set up arguments following RISC-V ILP32 calling convention + % Arguments are passed in a0-a7 (up to 8 register arguments) + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers + SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsRegsOnlyAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using JALR for call with return) + Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg). + % If none are available (all registers were pushed to the stack), + % we write the result to the stack position of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: find register index in SavedRegs * 4 bytes + ResultReg = element(2, FuncPtrTuple), + RegIndex = index_of(ResultReg, SavedRegs), + case RegIndex >= 0 of + true -> + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + false -> + % FuncPtrReg was not in SavedRegs, use an available register + [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + end; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +index_of(Item, List) -> index_of(Item, List, 0). + +index_of(_, [], _) -> -1; +index_of(Item, [Item | _], Index) -> Index; +index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1). + +push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg + StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes), + Stream1 = StreamModule:append(Stream0, StackAdjust), + {Stream2, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_riscv32_asm:sw(sp, Reg, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream1, 0}, + SavedRegs + ), + Stream2; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_riscv32_asm:lw(Reg, sp, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, 0}, + SavedRegs + ), + StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes), + StreamModule:append(Stream1, StackAdjust); +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> + parameter_regs0(T, Rest, [a1, a0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) -> + parameter_regs0(T, Rest, [a2, a1 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPart = Value band 16#FFFFFFFF, + HighPart = (Value bsr 32) band 16#FFFFFFFF, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_riscv32_asm:mv(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + % jit_state is always in a1, so we only need to move it if the param reg is different + case ParamReg of + a1 -> + State; + _ -> + I = jit_riscv32_asm:mv(ParamReg, a1), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +% For tail calls, jit_state is already in a1 +set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_riscv32_asm:lw(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_riscv32_asm:mv(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_riscv32_asm:sw(Reg, Src, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), + I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8), + I4 = jit_riscv32_asm:lw(Temp2, Reg, 8), + I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + riscv32_register(), + non_neg_integer() | riscv32_register(), + vm_register() | riscv32_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + I2 = jit_riscv32_asm:sw(Dest, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer() +) -> + {state(), riscv32_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_riscv32_asm:mv(Temp, IndexReg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, Reg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + {BaseReg, Off} = ?CP, + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), riscv32_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_riscv32_asm:mv(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_riscv32_asm:lw(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:mv(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), + I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + % Similar to AArch64: use pc_relative_address with a relocation that will be + % resolved to point directly to the label's actual address (not the jump table entry) + Offset = StreamModule:offset(Stream0), + % Emit placeholder for pc_relative_address (auipc + addi) + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming + % The relocation will replace these with the correct offset + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {Label, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + % Reserve 8 bytes with all-1s placeholder for flash programming + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load module from jit_state (which is in a1) + I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_riscv32_asm:lw(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = AvailableT, + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> + I1 = jit_riscv32_asm:slli(Reg, Reg, 8), + I2 = jit_riscv32_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + {State0#state{stream = Stream1}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + % RISC-V doesn't have bics, use not + and + I1 = jit_riscv32_asm:not_(Temp, Temp), + I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, <>), + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + {free, Reg}, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + {free, Reg}, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation (RISC-V: not + and) + I1 = jit_riscv32_asm:not_(a0, a0), + I2 = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, <>), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + {free, Reg}, + Val +) -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, I), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + I = jit_riscv32_asm:andi(ResultReg, Reg, -4), + Stream1 = StreamModule:append(State0#state.stream, I), + {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:or_(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_riscv32_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -16#800, Val =< 16#7FF +-> + % RISC-V li can handle 12-bit signed immediates in a single instruction (addi) + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + % For values outside 12-bit range, li will use lui + addi (2 instructions) + % which is efficient enough, no need for literal pool + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I1 = jit_riscv32_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 1), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 2), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 4), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:mul(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and RISC-V32 implementation: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: bne continuation ; Branch if reductions != 0 to continuation +%% 0x10-0x?: adr/sw/ldr/jalr sequence for scheduling next process +%% continuation: [actual function body] +%% +%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb. +%% When reductions != 0, we branch directly to continue execution. +%% When reductions == 0, we schedule the next process, and resume at the continuation point. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = <<16#FFFFFFFF:32/little>>, + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address + % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes) + I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), + NewI5Offset = NewOffset - ADROffset, + % Generate the new pc_relative_address instruction, padding with NOP if needed + NewI5 = + case pc_relative_address(Temp, NewI5Offset) of + I when byte_size(I) =:= 4 -> + % Only auipc, pad with NOP (4 bytes) + <>; + I when byte_size(I) =:= 6 -> + % auipc + c.addi, pad with c.nop (2 bytes) + <>; + I when byte_size(I) =:= 8 -> + % auipc + addi, no padding needed + I + end, + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count (jit_state is in a1) + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After branch instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch (RISC-V has ±4KB range) + + % Branch if NOT zero (temp != 0) + I4 = jit_riscv32_asm:bne(Temp, zero, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (temp == 0) + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. +set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> + % Reserve a temporary register for the offset BEFORE calling get_module_index + % to avoid running out of available registers + State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0} = State1, + Reg + } = get_module_index( + State0b + ), + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_riscv32_asm:slli(Reg, Reg, 24), + % Reserve space for offset load instruction + % li can generate 1 instruction (4 bytes) for small immediates (< 2048) + % or 2 instructions (8 bytes) for large immediates + % Since we don't know the final CP value yet (it depends on code size), + % we must always reserve 2 instructions (8 bytes) to be safe + % The final CP value is (final_offset << 2), and final_offset is unknown + % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0) + I2 = <<16#FFFFFFFF:32/little>>, + I3 = <<16#FFFFFFFF:32/little>>, + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I4 = jit_riscv32_asm:or_(Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + State4 = free_native_register(State3, TempReg), + {State4, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + CPValue = NewOffset bsl 2, + NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), + % We reserved 8 bytes (2 instructions) for the CP value + % Pad with NOP if needed to maintain alignment + PaddedInstr = + case byte_size(NewMoveInstr) of + 2 -> + <>; + 4 -> + <>; + 6 -> + <>; + 8 -> + NewMoveInstr + end, + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + {BaseReg1, Off1} = ?BS, + I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), + I2 = jit_riscv32_asm:li(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I2 = jit_riscv32_asm:ret(), + % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret) + % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes + I1 = pc_relative_address(a0, 10), + Prologue = <>, + ProloguePadded = + case byte_size(Prologue) of + 10 -> Prologue; + % 2-byte padding + 8 -> <> + end, + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate PC-relative address calculation using AUIPC + ADDI +%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions +-spec pc_relative_address(riscv32_register(), integer()) -> binary(). +pc_relative_address(Rd, 0) -> + % Simple case: just get current PC + jit_riscv32_asm:auipc(Rd, 0); +pc_relative_address(Rd, Offset) -> + % PC-relative address calculation + % Split offset into upper 20 bits and lower 12 bits + % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12) + % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047 + Lower = Offset band 16#FFF, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + % Compute upper 20 bits, adjusting if lower is negative + % Use arithmetic right shift (bsr) which preserves sign in Erlang + Upper = + if + LowerSigned < 0 -> + (Offset bsr 12) + 1; + true -> + Offset bsr 12 + end, + % Validate that Upper is in valid range for AUIPC + if + Upper < -16#80000; Upper > 16#7FFFF -> + error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF}); + true -> + ok + end, + case {Upper, LowerSigned} of + {0, 0} -> + % Zero offset + jit_riscv32_asm:auipc(Rd, 0); + {0, _} -> + % Only lower bits needed: auipc + addi + AuipcInstr = jit_riscv32_asm:auipc(Rd, 0), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <>; + {_, 0} -> + % Only upper bits needed + jit_riscv32_asm:auipc(Rd, Upper); + {_, _} -> + % Both upper and lower bits + AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:li(TempReg2, Offset), + I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1), + I3 = jit_riscv32_asm:li(TempReg1, Offset), + I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG), + I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:li(DstReg, Offset), + I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), + I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (t3) can only be used with mov, not ldr directly + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, DstReg), + I3 = jit_riscv32_asm:li(DstReg, Offset), + I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG), + I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + add_label(State0, Label, Offset0). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl new file mode 100644 index 0000000000..1d4b569620 --- /dev/null +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -0,0 +1,1820 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm). + +-export([ + % R-type arithmetic and logical instructions + add/3, + sub/3, + and_/3, + or_/2, + or_/3, + xor_/3, + sll/3, + srl/3, + sra/3, + slt/3, + sltu/3, + % I-type immediate instructions + addi/3, + andi/3, + ori/3, + xori/3, + slli/3, + srli/3, + srai/3, + slti/3, + sltiu/3, + % Load instructions + lw/2, + lw/3, + lh/2, + lh/3, + lhu/2, + lhu/3, + lb/2, + lb/3, + lbu/2, + lbu/3, + % Store instructions + sw/2, + sw/3, + sh/2, + sh/3, + sb/2, + sb/3, + % Branch instructions + beq/3, + bne/3, + blt/3, + bge/3, + bltu/3, + bgeu/3, + % Jump instructions + jal/2, + jalr/3, + jalr/2, + % Upper immediate instructions + lui/2, + auipc/2, + % Pseudo-instructions + nop/0, + li/2, + mv/2, + not_/2, + neg/2, + j/1, + jr/1, + ret/0, + call/2, + % M extension (multiply/divide) + mul/3, + % C extension (compressed) - arithmetic/logical + c_add/2, + c_sub/2, + c_and/2, + c_or/2, + c_xor/2, + c_mv/2, + % C extension - immediate instructions + c_addi/2, + c_andi/2, + c_li/2, + c_lui/2, + c_addi16sp/1, + c_addi4spn/2, + % C extension - shift instructions + c_slli/2, + c_srli/2, + c_srai/2, + % C extension - load/store + c_lw/2, + c_sw/2, + c_lwsp/2, + c_swsp/2, + % C extension - branches and jumps + c_beqz/2, + c_bnez/2, + c_j/1, + c_jal/1, + c_jr/1, + c_jalr/1, + % C extension - system instructions + c_ebreak/0, + % C extension - pseudo-instructions + c_nop/0 +]). + +-export_type([ + riscv_register/0 +]). + +%% RISC-V 32-bit (RV32I) Assembler +%% +%% This module provides an assembler for the RISC-V 32-bit instruction set. +%% It generates binary machine code for RISC-V instructions following the +%% RV32I base integer instruction set architecture. +%% +%% RISC-V Register Set (32 registers): +%% x0 (zero) - Hardwired zero (reads as 0, writes ignored) +%% x1 (ra) - Return address +%% x2 (sp) - Stack pointer +%% x3 (gp) - Global pointer +%% x4 (tp) - Thread pointer +%% x5 (t0) - Temporary register 0 +%% x6 (t1) - Temporary register 1 +%% x7 (t2) - Temporary register 2 +%% x8 (s0/fp)- Saved register 0 / Frame pointer +%% x9 (s1) - Saved register 1 +%% x10 (a0) - Function argument 0 / Return value 0 +%% x11 (a1) - Function argument 1 / Return value 1 +%% x12 (a2) - Function argument 2 +%% x13 (a3) - Function argument 3 +%% x14 (a4) - Function argument 4 +%% x15 (a5) - Function argument 5 +%% x16 (a6) - Function argument 6 +%% x17 (a7) - Function argument 7 +%% x18 (s2) - Saved register 2 +%% x19 (s3) - Saved register 3 +%% x20 (s4) - Saved register 4 +%% x21 (s5) - Saved register 5 +%% x22 (s6) - Saved register 6 +%% x23 (s7) - Saved register 7 +%% x24 (s8) - Saved register 8 +%% x25 (s9) - Saved register 9 +%% x26 (s10) - Saved register 10 +%% x27 (s11) - Saved register 11 +%% x28 (t3) - Temporary register 3 +%% x29 (t4) - Temporary register 4 +%% x30 (t5) - Temporary register 5 +%% x31 (t6) - Temporary register 6 +%% +%% RISC-V Calling Convention (ILP32): +%% - Arguments: a0-a7 (x10-x17) +%% - Return values: a0-a1 (x10-x11) +%% - Caller-saved: t0-t6, a0-a7 +%% - Callee-saved: s0-s11, sp, ra +%% - Stack grows downward +%% - Stack must be 16-byte aligned at function call boundaries +%% +%% Instruction Encoding: +%% All RV32I instructions are 32 bits (4 bytes). +%% Bit ordering is little-endian within each 32-bit word. +%% +%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA +%% https://riscv.org/technical/specifications/ +%% https://github.com/riscv/riscv-isa-manual/ + +-type riscv_register() :: + zero + | ra + | sp + | gp + | tp + | t0 + | t1 + | t2 + | s0 + | fp + | s1 + | a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | t3 + | t4 + | t5 + | t6. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers (0-31) +-spec reg_to_num(riscv_register()) -> 0..31. +% ABI names +reg_to_num(zero) -> 0; +reg_to_num(ra) -> 1; +reg_to_num(sp) -> 2; +reg_to_num(gp) -> 3; +reg_to_num(tp) -> 4; +reg_to_num(t0) -> 5; +reg_to_num(t1) -> 6; +reg_to_num(t2) -> 7; +reg_to_num(s0) -> 8; +reg_to_num(fp) -> 8; +reg_to_num(s1) -> 9; +reg_to_num(a0) -> 10; +reg_to_num(a1) -> 11; +reg_to_num(a2) -> 12; +reg_to_num(a3) -> 13; +reg_to_num(a4) -> 14; +reg_to_num(a5) -> 15; +reg_to_num(a6) -> 16; +reg_to_num(a7) -> 17; +reg_to_num(s2) -> 18; +reg_to_num(s3) -> 19; +reg_to_num(s4) -> 20; +reg_to_num(s5) -> 21; +reg_to_num(s6) -> 22; +reg_to_num(s7) -> 23; +reg_to_num(s8) -> 24; +reg_to_num(s9) -> 25; +reg_to_num(s10) -> 26; +reg_to_num(s11) -> 27; +reg_to_num(t3) -> 28; +reg_to_num(t4) -> 29; +reg_to_num(t5) -> 30; +reg_to_num(t6) -> 31. + +%%----------------------------------------------------------------------------- +%% R-type instruction encoding +%%----------------------------------------------------------------------------- + +%% R-type instruction format: +%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_r_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Funct7 :: 0..127 +) -> binary(). +encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct7 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADD - Add +%% rd = rs1 + rs2 +-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero -> + % Use c.add when rd == rs1 and neither register is zero + c_add(Rd, Rs2); +add(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). + +%% SUB - Subtract +%% rd = rs1 - rs2 +-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_sub(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20) + end; +sub(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). + +%% AND - Bitwise AND +%% rd = rs1 & rs2 +-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_and(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00) + end; +and_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR +%% rd = rs1 | rs2 +-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_or(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00) + end; +or_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR (in-place) +%% rd = rd | rs +-spec or_(riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs) -> + or_(Rd, Rd, Rs). + +%% XOR - Bitwise XOR +%% rd = rs1 ^ rs2 +-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_xor(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00) + end; +xor_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). + +%% SLL - Shift Left Logical +%% rd = rs1 << rs2[4:0] +-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sll(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00). + +%% SRL - Shift Right Logical +%% rd = rs1 >> rs2[4:0] (zero-extend) +-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary(). +srl(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00). + +%% SRA - Shift Right Arithmetic +%% rd = rs1 >> rs2[4:0] (sign-extend) +-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sra(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20). + +%% SLT - Set Less Than +%% rd = (rs1 < rs2) ? 1 : 0 (signed) +-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary(). +slt(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00). + +%% SLTU - Set Less Than Unsigned +%% rd = (rs1 < rs2) ? 1 : 0 (unsigned) +-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sltu(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00). + +%%----------------------------------------------------------------------------- +%% I-type instruction encoding +%%----------------------------------------------------------------------------- + +%% I-type instruction format: +%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-20 19-15 14-12 11-7 6-0 + +-spec encode_i_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + % Sign-extend and mask to 12 bits + ImmMasked = Imm band 16#FFF, + Instr = + (ImmMasked bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% I-type immediate arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADDI - Add Immediate +%% rd = rs1 + imm +-spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed) + c_addi(Rd, Imm); +addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 000 + encode_i_type(16#13, Rd, 16#0, Rs1, Imm); +addi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ANDI - AND Immediate +%% rd = rs1 & imm +-spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 -> + case is_compressed_reg(Rd) of + true -> c_andi(Rd, Imm); + false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm) + end; +andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 111 + encode_i_type(16#13, Rd, 16#7, Rs1, Imm); +andi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ORI - OR Immediate +%% rd = rs1 | imm +-spec ori(riscv_register(), riscv_register(), integer()) -> binary(). +ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 110 + encode_i_type(16#13, Rd, 16#6, Rs1, Imm); +ori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% XORI - XOR Immediate +%% rd = rs1 ^ imm +-spec xori(riscv_register(), riscv_register(), integer()) -> binary(). +xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 100 + encode_i_type(16#13, Rd, 16#4, Rs1, Imm); +xori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTI - Set Less Than Immediate +%% rd = (rs1 < imm) ? 1 : 0 (signed) +-spec slti(riscv_register(), riscv_register(), integer()) -> binary(). +slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 010 + encode_i_type(16#13, Rd, 16#2, Rs1, Imm); +slti(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTIU - Set Less Than Immediate Unsigned +%% rd = (rs1 < imm) ? 1 : 0 (unsigned) +-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary(). +sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 011 + encode_i_type(16#13, Rd, 16#3, Rs1, Imm); +sltiu(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% I-type immediate shift instructions +%%----------------------------------------------------------------------------- + +%% SLLI - Shift Left Logical Immediate +%% rd = rs1 << shamt +-spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 -> + % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved) + c_slli(Rd, Shamt); +slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); +slli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRLI - Shift Right Logical Immediate +%% rd = rs1 >> shamt (zero-extend) +-spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> c_srli(Rd, Shamt); + false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt) + end; +srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); +srli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRAI - Shift Right Arithmetic Immediate +%% rd = rs1 >> shamt (sign-extend) +-spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> + c_srai(Rd, Shamt); + false -> + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30) + end; +srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 + % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30); +srai(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%%----------------------------------------------------------------------------- +%% Load instructions (I-type) +%%----------------------------------------------------------------------------- + +%% LW - Load Word +%% rd = mem[rs1 + offset] (32-bit) +-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lw(Rd, {Rs1, Offset}) -> + lw(Rd, Rs1, Offset); +lw(Rd, Rs1) when is_atom(Rs1) -> + lw(Rd, Rs1, 0). + +-spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.lwsp for loads from sp with aligned offset in range + c_lwsp(Rd, Offset); +lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.lw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of + true -> c_lw(Rd, {Rs1, Offset}); + false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset) + end; +lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 010 + encode_i_type(16#03, Rd, 16#2, Rs1, Offset); +lw(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LH - Load Halfword (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][15:0]) +-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lh(Rd, {Rs1, Offset}) -> + lh(Rd, Rs1, Offset); +lh(Rd, Rs1) when is_atom(Rs1) -> + lh(Rd, Rs1, 0). + +-spec lh(riscv_register(), riscv_register(), integer()) -> binary(). +lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 001 + encode_i_type(16#03, Rd, 16#1, Rs1, Offset); +lh(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LHU - Load Halfword Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][15:0]) +-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lhu(Rd, {Rs1, Offset}) -> + lhu(Rd, Rs1, Offset); +lhu(Rd, Rs1) when is_atom(Rs1) -> + lhu(Rd, Rs1, 0). + +-spec lhu(riscv_register(), riscv_register(), integer()) -> binary(). +lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 101 + encode_i_type(16#03, Rd, 16#5, Rs1, Offset); +lhu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LB - Load Byte (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][7:0]) +-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lb(Rd, {Rs1, Offset}) -> + lb(Rd, Rs1, Offset); +lb(Rd, Rs1) when is_atom(Rs1) -> + lb(Rd, Rs1, 0). + +-spec lb(riscv_register(), riscv_register(), integer()) -> binary(). +lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 000 + encode_i_type(16#03, Rd, 16#0, Rs1, Offset); +lb(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LBU - Load Byte Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][7:0]) +-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lbu(Rd, {Rs1, Offset}) -> + lbu(Rd, Rs1, Offset); +lbu(Rd, Rs1) when is_atom(Rs1) -> + lbu(Rd, Rs1, 0). + +-spec lbu(riscv_register(), riscv_register(), integer()) -> binary(). +lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 100 + encode_i_type(16#03, Rd, 16#4, Rs1, Offset); +lbu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% S-type instruction encoding (for stores) +%%----------------------------------------------------------------------------- + +%% S-type instruction format: +%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_s_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7 + ImmMasked = Imm band 16#FFF, + Imm11_5 = (ImmMasked bsr 5) band 16#7F, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Imm11_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_0 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Store instructions (S-type) +%%----------------------------------------------------------------------------- + +%% SW - Store Word +%% mem[rs1 + offset] = rs2[31:0] +-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sw(Rs2, {Rs1, Offset}) -> + sw(Rs1, Rs2, Offset); +sw(Rs2, Rs1) when is_atom(Rs1) -> + sw(Rs1, Rs2, 0). + +-spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.swsp for stores to sp with aligned offset in range + c_swsp(Rs2, Offset); +sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.sw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of + true -> c_sw(Rs2, {Rs1, Offset}); + false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset) + end; +sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 010 + encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); +sw(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SH - Store Halfword +%% mem[rs1 + offset][15:0] = rs2[15:0] +-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sh(Rs2, {Rs1, Offset}) -> + sh(Rs1, Rs2, Offset); +sh(Rs2, Rs1) when is_atom(Rs1) -> + sh(Rs1, Rs2, 0). + +-spec sh(riscv_register(), riscv_register(), integer()) -> binary(). +sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 001 + encode_s_type(16#23, 16#1, Rs1, Rs2, Offset); +sh(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SB - Store Byte +%% mem[rs1 + offset][7:0] = rs2[7:0] +-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sb(Rs2, {Rs1, Offset}) -> + sb(Rs1, Rs2, Offset); +sb(Rs2, Rs1) when is_atom(Rs1) -> + sb(Rs1, Rs2, 0). + +-spec sb(riscv_register(), riscv_register(), integer()) -> binary(). +sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 000 + encode_s_type(16#23, 16#0, Rs1, Rs2, Offset); +sb(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% B-type instruction encoding (for branches) +%%----------------------------------------------------------------------------- + +%% B-type instruction format: +%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 +%% +%% The immediate is split across the instruction and represents a signed offset +%% in multiples of 2 bytes (must be 2-byte aligned). +%% Range: ±4 KiB (±4096 bytes) + +-spec encode_b_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Offset :: integer() +) -> binary(). +encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Offset must be 2-byte aligned and in range [-4096, 4094] + % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11] + OffsetMasked = Offset band 16#1FFF, + % imm[12] -> bit 31 + Imm12 = (OffsetMasked bsr 12) band 1, + % imm[10:5] -> bits 30-25 + Imm10_5 = (OffsetMasked bsr 5) band 16#3F, + % imm[4:1] -> bits 11-8 + Imm4_1 = (OffsetMasked bsr 1) band 16#F, + % imm[11] -> bit 7 + Imm11 = (OffsetMasked bsr 11) band 1, + Instr = + (Imm12 bsl 31) bor + (Imm10_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_1 bsl 8) bor + (Imm11 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Branch instructions (B-type) +%%----------------------------------------------------------------------------- + +%% BEQ - Branch if Equal +%% if (rs1 == rs2) pc += offset +-spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.beqz when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_beqz(Rs1, Offset); + false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset) + end; +beq(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 000 + encode_b_type(16#63, 16#0, Rs1, Rs2, Offset); +beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +beq(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BNE - Branch if Not Equal +%% if (rs1 != rs2) pc += offset +-spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.bnez when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_bnez(Rs1, Offset); + false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset) + end; +bne(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 001 + encode_b_type(16#63, 16#1, Rs1, Rs2, Offset); +bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bne(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLT - Branch if Less Than (signed) +%% if (rs1 < rs2) pc += offset +-spec blt(riscv_register(), riscv_register(), integer()) -> binary(). +blt(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 100 + encode_b_type(16#63, 16#4, Rs1, Rs2, Offset); +blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +blt(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGE - Branch if Greater or Equal (signed) +%% if (rs1 >= rs2) pc += offset +-spec bge(riscv_register(), riscv_register(), integer()) -> binary(). +bge(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 101 + encode_b_type(16#63, 16#5, Rs1, Rs2, Offset); +bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bge(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLTU - Branch if Less Than Unsigned +%% if (rs1 < rs2) pc += offset (unsigned) +-spec bltu(riscv_register(), riscv_register(), integer()) -> binary(). +bltu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 110 + encode_b_type(16#63, 16#6, Rs1, Rs2, Offset); +bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bltu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGEU - Branch if Greater or Equal Unsigned +%% if (rs1 >= rs2) pc += offset (unsigned) +-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary(). +bgeu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 111 + encode_b_type(16#63, 16#7, Rs1, Rs2, Offset); +bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bgeu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%%----------------------------------------------------------------------------- +%% J-type instruction encoding (for JAL) +%%----------------------------------------------------------------------------- + +%% J-type instruction format (JAL): +%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 +%% +%% The immediate represents a signed offset in multiples of 2 bytes. +%% Range: ±1 MiB (±1048576 bytes) + +-spec encode_j_type( + Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer() +) -> binary(). +encode_j_type(Opcode, Rd, Offset) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12] + OffsetMasked = Offset band 16#1FFFFF, + % imm[20] -> bit 31 + Imm20 = (OffsetMasked bsr 20) band 1, + % imm[10:1] -> bits 30-21 + Imm10_1 = (OffsetMasked bsr 1) band 16#3FF, + % imm[11] -> bit 20 + Imm11 = (OffsetMasked bsr 11) band 1, + % imm[19:12] -> bits 19-12 + Imm19_12 = (OffsetMasked bsr 12) band 16#FF, + Instr = + (Imm20 bsl 31) bor + (Imm10_1 bsl 21) bor + (Imm11 bsl 20) bor + (Imm19_12 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% U-type instruction encoding (for LUI, AUIPC) +%%----------------------------------------------------------------------------- + +%% U-type instruction format: +%% imm[31:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 + +-spec encode_u_type( + Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer() +) -> binary(). +encode_u_type(Opcode, Rd, Imm) -> + RdNum = reg_to_num(Rd), + % Upper 20 bits of immediate + ImmUpper = (Imm bsr 12) band 16#FFFFF, + Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Jump and link instructions +%%----------------------------------------------------------------------------- + +%% JAL - Jump and Link +%% rd = pc + 4; pc += offset +-spec jal(riscv_register(), integer()) -> binary(). +jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.j when rd is zero (no link) and offset fits + c_j(Offset); +jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.jal when rd is ra and offset fits (RV32C only) + c_jal(Offset); +jal(Rd, Offset) when + Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 +-> + % Opcode: 1101111 (0x6F) + encode_j_type(16#6F, Rd, Offset); +jal(_Rd, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +jal(_Rd, Offset) -> + error({offset_out_of_range, Offset, -1048576, 1048574}). + +%% JALR - Jump and Link Register +%% rd = pc + 4; pc = (rs1 + offset) & ~1 +-spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(zero, Rs1, 0) when Rs1 =/= zero -> + % Use c.jr for jump to register without link (rd=zero, offset=0) + c_jr(Rs1); +jalr(ra, Rs1, 0) when Rs1 =/= zero -> + % Use c.jalr for jump to register with link (rd=ra, offset=0) + c_jalr(Rs1); +jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 1100111 (0x67), Funct3: 000 + encode_i_type(16#67, Rd, 16#0, Rs1, Offset); +jalr(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% JALR - Jump and Link Register (no offset) +%% rd = pc + 4; pc = rs1 & ~1 +-spec jalr(riscv_register(), riscv_register()) -> binary(). +jalr(Rd, Rs1) -> + jalr(Rd, Rs1, 0). + +%%----------------------------------------------------------------------------- +%% Upper immediate instructions +%%----------------------------------------------------------------------------- + +%% LUI - Load Upper Immediate +%% rd = imm << 12 +-spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 -> + % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0 + c_lui(Rd, Imm); +lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0110111 (0x37) + encode_u_type(16#37, Rd, Imm bsl 12); +lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%% AUIPC - Add Upper Immediate to PC +%% rd = pc + (imm << 12) +-spec auipc(riscv_register(), integer()) -> binary(). +auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0010111 (0x17) + encode_u_type(16#17, Rd, Imm bsl 12); +auipc(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%%----------------------------------------------------------------------------- +%% Pseudo-instructions +%%----------------------------------------------------------------------------- +%% These are convenience instructions that map to actual RV32I instructions + +%% NOP - No Operation +%% Expands to: addi x0, x0, 0 +-spec nop() -> binary(). +nop() -> + addi(zero, zero, 0). + +%% LI - Load Immediate +%% Load a 32-bit immediate value into a register +%% For small immediates (-2048 to 2047): addi rd, x0, imm +%% For larger immediates: lui + addi sequence +-spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.li when rd != zero and imm fits in 6 bits (signed) + c_li(Rd, Imm); +li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> + % Small immediate: addi rd, x0, imm + addi(Rd, zero, Imm); +% Handle unsigned values that represent small signed values (e.g., 0xFFFFFFFF = -1) +li(Rd, Imm) when Imm > 16#7FFFFFFF, Imm - 16#100000000 >= -2048 -> + % This unsigned value fits in 12-bit signed range when normalized + addi(Rd, zero, Imm - 16#100000000); +li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#FFFFFFFF -> + % Large immediate: lui or lui + addi + % Split into upper 20 bits and lower 12 bits + % Need to account for sign extension of lower 12 bits + % Work with unsigned values to avoid issues with arithmetic right shift + UnsignedImm = + if + Imm < 0 -> Imm + 16#100000000; + true -> Imm + end, + Lower = UnsignedImm band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + % because addi will sign-extend the immediate + UpperRaw = + if + Lower >= 16#800 -> + (UnsignedImm bsr 12) + 1; + true -> + UnsignedImm bsr 12 + end, + % Mask to 20 bits first, then sign extend if needed for lui instruction + UpperMasked = UpperRaw band 16#FFFFF, + Upper = + if + UpperMasked band 16#80000 =/= 0 -> + % Bit 19 is set, so this is negative in 20-bit representation + % Sign extend from 20 bits + UpperMasked - 16#100000; + true -> + % Positive value + UpperMasked + end, + % Only emit addi if lower bits are non-zero + LuiInstr = lui(Rd, Upper), + if + Lower =:= 0 -> + % Just lui is sufficient when lower 12 bits are zero + LuiInstr; + true -> + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AddiInstr = addi(Rd, Rd, LowerSigned), + <> + end; +li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000000, 16#FFFFFFFF}). + +%% MV - Move (copy register) +%% Expands to: addi rd, rs, 0 or c.mv rd, rs +-spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) when Rd =/= zero, Rs =/= zero -> + % Use c.mv when both rd and rs are not zero + c_mv(Rd, Rs); +mv(Rd, Rs) -> + addi(Rd, Rs, 0). + +%% NOT - Bitwise NOT +%% Expands to: xori rd, rs, -1 +-spec not_(riscv_register(), riscv_register()) -> binary(). +not_(Rd, Rs) -> + xori(Rd, Rs, -1). + +%% NEG - Negate (two's complement) +%% Expands to: sub rd, x0, rs +-spec neg(riscv_register(), riscv_register()) -> binary(). +neg(Rd, Rs) -> + sub(Rd, zero, Rs). + +%% J - Unconditional Jump +%% Expands to: jal x0, offset +-spec j(integer()) -> binary(). +j(Offset) -> + jal(zero, Offset). + +%% JR - Jump Register +%% Expands to: jalr x0, rs, 0 +-spec jr(riscv_register()) -> binary(). +jr(Rs) -> + jalr(zero, Rs, 0). + +%% RET - Return from subroutine +%% Expands to: jalr x0, ra, 0 +-spec ret() -> binary(). +ret() -> + jalr(zero, ra, 0). + +%% CALL - Call function (far call using AUIPC + JALR) +%% This is a two-instruction sequence for calling functions beyond JAL range +%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0] +-spec call(riscv_register(), integer()) -> binary(). +call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> + % Split offset into upper 20 bits and lower 12 bits + Lower = Offset band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + Upper = + if + Lower >= 16#800 -> + ((Offset bsr 12) + 1) band 16#FFFFF; + true -> + (Offset bsr 12) band 16#FFFFF + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AuipcInstr = auipc(Rd, Upper), + JalrInstr = jalr(ra, Rd, LowerSigned), + <>; +call(_Rd, Offset) -> + error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). + +%% MUL - Multiply (RV32M extension) +%% Multiplies rs1 by rs2 and places the lower 32 bits in rd +%% Format: mul rd, rs1, rs2 +%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01 +-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary(). +mul(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). + +%%----------------------------------------------------------------------------- +%% C Extension (RV32C) - Compressed Instructions +%%----------------------------------------------------------------------------- +%% The C extension adds 16-bit compressed instructions to reduce code size. +%% All compressed instructions are 16 bits (2 bytes) and use a different +%% encoding format from the base 32-bit instructions. +%% +%% Register encoding for compressed instructions: +%% - Some instructions use the full 5-bit register encoding (x0-x31) +%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5) +%% This is called the "compressed register set" or "C register set" +%% +%% Instruction formats: +%% - CR (Register): funct4 | rd/rs1 | rs2 | op +%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op +%% - CSS (Stack Store): funct3 | imm | rs2 | op +%% - CIW (Wide Immediate): funct3 | imm | rd' | op +%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op +%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op +%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op +%% - CB (Branch): funct3 | offset | rs1' | offset | op +%% - CJ (Jump): funct3 | jump target | op +%% +%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16 +%%----------------------------------------------------------------------------- + +%% Convert register to compressed register encoding (3 bits for x8-x15) +%% Returns the 3-bit encoding (0-7 maps to x8-x15) +-spec reg_to_c_num(riscv_register()) -> 0..7. +reg_to_c_num(s0) -> 0; +reg_to_c_num(fp) -> 0; +reg_to_c_num(s1) -> 1; +reg_to_c_num(a0) -> 2; +reg_to_c_num(a1) -> 3; +reg_to_c_num(a2) -> 4; +reg_to_c_num(a3) -> 5; +reg_to_c_num(a4) -> 6; +reg_to_c_num(a5) -> 7; +reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}). + +%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5) +-spec is_compressed_reg(riscv_register()) -> boolean(). +is_compressed_reg(s0) -> true; +is_compressed_reg(fp) -> true; +is_compressed_reg(s1) -> true; +is_compressed_reg(a0) -> true; +is_compressed_reg(a1) -> true; +is_compressed_reg(a2) -> true; +is_compressed_reg(a3) -> true; +is_compressed_reg(a4) -> true; +is_compressed_reg(a5) -> true; +is_compressed_reg(_) -> false. + +%%----------------------------------------------------------------------------- +%% CR-type instruction encoding (Compressed Register format) +%%----------------------------------------------------------------------------- +%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2) +%% Bits: 15-12 11-7 6-2 1-0 + +-spec encode_cr_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct4 :: 0..15 +) -> binary(). +encode_cr_type(Opcode, Rd, Rs2, Funct4) -> + RdNum = reg_to_num(Rd), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct4 bsl 12) bor + (RdNum bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CI-type instruction encoding (Compressed Immediate format) +%%----------------------------------------------------------------------------- +%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2) +%% Bits: 15-13 12 11-7 6-2 1-0 + +-spec encode_ci_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ci_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Funct3 bsl 13) bor + (Imm5 bsl 12) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CSS-type instruction encoding (Compressed Stack Store format) +%%----------------------------------------------------------------------------- +%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2) +%% Bits: 15-13 12-7 6-2 1-0 + +-spec encode_css_type( + Opcode :: 0..3, + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_css_type(Opcode, Rs2, Imm, Funct3) -> + Rs2Num = reg_to_num(Rs2), + % Extract immediate bits (typically scaled for word access) + ImmMasked = Imm band 16#3F, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CIW-type instruction encoding (Compressed Wide Immediate format) +%%----------------------------------------------------------------------------- +%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 + +-spec encode_ciw_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ciw_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#FF, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CL-type instruction encoding (Compressed Load format) +%%----------------------------------------------------------------------------- +%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cl_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs1 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + Rs1Num = reg_to_c_num(Rs1), + % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CS-type instruction encoding (Compressed Store format) +%%----------------------------------------------------------------------------- +%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cs_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + Rs2Num = reg_to_c_num(Rs2), + % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CA-type instruction encoding (Compressed Arithmetic format) +%%----------------------------------------------------------------------------- +%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2) +%% Bits: 15-10 9-7 6-5 4-2 1-0 + +-spec encode_ca_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct2 :: 0..3, + Funct6 :: 0..63 +) -> binary(). +encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) -> + RdNum = reg_to_c_num(Rd), + Rs2Num = reg_to_c_num(Rs2), + Instr = + (Funct6 bsl 10) bor + (RdNum bsl 7) bor + (Funct2 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CB-type instruction encoding (Compressed Branch format) +%%----------------------------------------------------------------------------- +%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 +%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + +-spec encode_cb_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Offset :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cb_type(Opcode, Rs1, Offset, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + OffsetMasked = Offset band 16#1FF, + Offset8 = (OffsetMasked bsr 8) band 1, + Offset4_3 = (OffsetMasked bsr 3) band 3, + Offset7_6 = (OffsetMasked bsr 6) band 3, + Offset2_1 = (OffsetMasked bsr 1) band 3, + Offset5 = (OffsetMasked bsr 5) band 1, + Instr = + (Funct3 bsl 13) bor + (Offset8 bsl 12) bor + (Offset4_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Offset7_6 bsl 5) bor + (Offset2_1 bsl 3) bor + (Offset5 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CJ-type instruction encoding (Compressed Jump format) +%%----------------------------------------------------------------------------- +%% CJ format: funct3 (3) | jump target (11) | op (2) +%% Bits: 15-13 12-2 1-0 +%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2] + +-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary(). +encode_cj_type(Opcode, Offset, Funct3) -> + % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5] + OffsetMasked = Offset band 16#FFF, + Offset11 = (OffsetMasked bsr 11) band 1, + Offset4 = (OffsetMasked bsr 4) band 1, + Offset9_8 = (OffsetMasked bsr 8) band 3, + Offset10 = (OffsetMasked bsr 10) band 1, + Offset6 = (OffsetMasked bsr 6) band 1, + Offset7 = (OffsetMasked bsr 7) band 1, + Offset3_1 = (OffsetMasked bsr 1) band 7, + Offset5 = (OffsetMasked bsr 5) band 1, + OffsetBits = + (Offset11 bsl 10) bor + (Offset4 bsl 9) bor + (Offset9_8 bsl 7) bor + (Offset10 bsl 6) bor + (Offset6 bsl 5) bor + (Offset7 bsl 4) bor + (Offset3_1 bsl 1) bor + Offset5, + Instr = + (Funct3 bsl 13) bor + (OffsetBits bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical Instructions +%%----------------------------------------------------------------------------- + +%% C.ADD - Compressed Add +%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), op=10 (0x2) +-spec c_add(riscv_register(), riscv_register()) -> binary(). +c_add(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#9). + +%% C.MV - Compressed Move (copy register) +%% rd = rs2 (both are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), op=10 (0x2) +-spec c_mv(riscv_register(), riscv_register()) -> binary(). +c_mv(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#8). + +%% C.SUB - Compressed Subtract +%% rd' = rd' - rs2' (both use 3-bit compressed register encoding) +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1) +-spec c_sub(riscv_register(), riscv_register()) -> binary(). +c_sub(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23). + +%% C.AND - Compressed Bitwise AND +%% rd' = rd' & rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1) +-spec c_and(riscv_register(), riscv_register()) -> binary(). +c_and(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23). + +%% C.OR - Compressed Bitwise OR +%% rd' = rd' | rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1) +-spec c_or(riscv_register(), riscv_register()) -> binary(). +c_or(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23). + +%% C.XOR - Compressed Bitwise XOR +%% rd' = rd' ^ rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1) +-spec c_xor(riscv_register(), riscv_register()) -> binary(). +c_xor(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23). + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate Instructions +%%----------------------------------------------------------------------------- + +%% C.ADDI - Compressed Add Immediate +%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=000, op=01 (0x1) +-spec c_addi(riscv_register(), integer()) -> binary(). +c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero -> + encode_ci_type(16#1, Rd, Imm, 16#0); +c_addi(zero, _Imm) -> + error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'}); +c_addi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ANDI - Compressed AND Immediate +%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01 +-spec c_andi(riscv_register(), integer()) -> binary(). +c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (16#4 bsl 13) bor + (Imm5 bsl 12) bor + (16#2 bsl 10) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + 16#1, + <>; +c_andi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LI - Compressed Load Immediate +%% rd = imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=010, op=01 (0x1) +-spec c_li(riscv_register(), integer()) -> binary(). +c_li(Rd, Imm) when Imm >= -32, Imm =< 31 -> + encode_ci_type(16#1, Rd, Imm, 16#2); +c_li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LUI - Compressed Load Upper Immediate +%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero) +%% Format: CI-type +%% Encoding: funct3=011, op=01 (0x1) +-spec c_lui(riscv_register(), integer()) -> binary(). +c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp -> + encode_ci_type(16#1, Rd, Imm, 16#3); +c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp -> + error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'}); +c_lui(_Rd, 0) -> + error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'}); +c_lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16) +%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero) +%% Format: CI-type (special encoding) +%% Encoding: funct3=011, rd/rs1=sp (x2), op=01 +-spec c_addi16sp(integer()) -> binary(). +c_addi16sp(Imm) when + Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0 +-> + % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2] + ImmMasked = Imm band 16#3FF, + Imm9 = (ImmMasked bsr 9) band 1, + Imm4 = (ImmMasked bsr 4) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Imm8_7 = (ImmMasked bsr 7) band 3, + Imm5 = (ImmMasked bsr 5) band 1, + ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5, + encode_ci_type(16#1, sp, ImmBits, 16#3); +c_addi16sp(0) -> + error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'}); +c_addi16sp(Imm) when (Imm rem 16) =/= 0 -> + error({immediate_not_aligned, Imm, 16}); +c_addi16sp(Imm) -> + error({immediate_out_of_range, Imm, -512, 496}). + +%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd' +%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero) +%% Format: CIW-type +%% Encoding: funct3=000, op=00 (0x0) +-spec c_addi4spn(riscv_register(), integer()) -> binary(). +c_addi4spn(Rd, Imm) when + Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0 +-> + % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5] + ImmMasked = Imm band 16#3FF, + Imm5_4 = (ImmMasked bsr 4) band 3, + Imm9_6 = (ImmMasked bsr 6) band 15, + Imm2 = (ImmMasked bsr 2) band 1, + Imm3 = (ImmMasked bsr 3) band 1, + ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3, + encode_ciw_type(16#0, Rd, ImmBits, 16#0); +c_addi4spn(_Rd, Imm) when Imm =:= 0 -> + error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'}); +c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 -> + error({immediate_not_aligned, Imm, 4}); +c_addi4spn(_Rd, Imm) -> + error({immediate_out_of_range, Imm, 4, 1020}). + +%%----------------------------------------------------------------------------- +%% C Extension - Shift Instructions +%%----------------------------------------------------------------------------- + +%% C.SLLI - Compressed Shift Left Logical Immediate +%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned) +%% Format: CI-type +%% Encoding: funct3=000, op=10 (0x2) +-spec c_slli(riscv_register(), 0..63) -> binary(). +c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero -> + encode_ci_type(16#2, Rd, Shamt, 16#0); +c_slli(zero, _Shamt) -> + error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'}); +c_slli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRLI - Compressed Shift Right Logical Immediate +%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01 +-spec c_srli(riscv_register(), 0..63) -> binary(). +c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#0 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRAI - Compressed Shift Right Arithmetic Immediate +%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01 +-spec c_srai(riscv_register(), 0..63) -> binary(). +c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#1 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srai(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store Instructions +%%----------------------------------------------------------------------------- + +%% C.LW - Compressed Load Word +%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CL-type +%% Encoding: funct3=010, op=00 (0x0) +-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_lw(Rd, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cl_type(16#0, Rd, Rs1, Offset, 16#2); +c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lw(_Rd, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.SW - Compressed Store Word +%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CS-type +%% Encoding: funct3=110, op=00 (0x0) +-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_sw(Rs2, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6); +c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_sw(_Rs2, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.LWSP - Compressed Load Word from Stack Pointer +%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CI-type (special encoding) +%% Encoding: funct3=010, op=10 (0x2) +-spec c_lwsp(riscv_register(), integer()) -> binary(). +c_lwsp(Rd, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero +-> + % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2] + OffsetMasked = Offset band 16#FF, + Offset5 = (OffsetMasked bsr 5) band 1, + Offset4_2 = (OffsetMasked bsr 2) band 7, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6, + encode_ci_type(16#2, Rd, ImmBits, 16#2); +c_lwsp(zero, _Offset) -> + error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'}); +c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lwsp(_Rd, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%% C.SWSP - Compressed Store Word to Stack Pointer +%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CSS-type +%% Encoding: funct3=110, op=10 (0x2) +-spec c_swsp(riscv_register(), integer()) -> binary(). +c_swsp(Rs2, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0 +-> + % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7] + OffsetMasked = Offset band 16#FF, + Offset5_2 = (OffsetMasked bsr 2) band 15, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5_2 bsl 2) bor Offset7_6, + encode_css_type(16#2, Rs2, ImmBits, 16#6); +c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_swsp(_Rs2, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump Instructions +%%----------------------------------------------------------------------------- + +%% C.BEQZ - Compressed Branch if Equal to Zero +%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=110, op=01 (0x1) +-spec c_beqz(riscv_register(), integer()) -> binary(). +c_beqz(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#6); +c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_beqz(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.BNEZ - Compressed Branch if Not Equal to Zero +%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=111, op=01 (0x1) +-spec c_bnez(riscv_register(), integer()) -> binary(). +c_bnez(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#7); +c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_bnez(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.J - Compressed Unconditional Jump +%% pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=101, op=01 (0x1) +-spec c_j(integer()) -> binary(). +c_j(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#5); +c_j(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_j(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra) +%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=001 (0x1), op=01 (0x1) +-spec c_jal(integer()) -> binary(). +c_jal(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#1); +c_jal(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_jal(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JR - Compressed Jump Register +%% pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2) +-spec c_jr(riscv_register()) -> binary(). +c_jr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#8); +c_jr(zero) -> + error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}). + +%% C.JALR - Compressed Jump and Link Register +%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2) +-spec c_jalr(riscv_register()) -> binary(). +c_jalr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#9); +c_jalr(zero) -> + error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}). + +%% C.EBREAK - Compressed Environment Breakpoint +%% Causes a breakpoint exception to be raised +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2) +-spec c_ebreak() -> binary(). +c_ebreak() -> + encode_cr_type(16#2, zero, zero, 16#9). + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instructions +%%----------------------------------------------------------------------------- + +%% C.NOP - Compressed No Operation +%% Expands to: c.addi x0, 0 +%% Format: CI-type +%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1) +-spec c_nop() -> binary(). +c_nop() -> + encode_ci_type(16#1, zero, 0, 16#0). diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 35330fdecc..db7914438b 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -209,3 +209,4 @@ X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") +X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 009f7795af..a087abce54 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -86,6 +86,20 @@ _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->rema _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif @@ -133,7 +147,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live) static void jit_trim_live_regs(Context *ctx, uint32_t live) { - TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live); + TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live); if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) { destroy_extended_registers(ctx, live); } @@ -173,8 +187,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state) static Context *jit_terminate_context(Context *ctx, JITState *jit_state) { - TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id); - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id); + TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); @@ -186,7 +200,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state) static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset) { - TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); if (offset || term_is_invalid_term(ctx->x[2])) { ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]); } @@ -255,14 +269,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_ static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom) { - TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); set_error(ctx, jit_state, offset, error_type_atom); return jit_handle_error(ctx, jit_state, 0); } static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1) { - TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); // We can gc as we are raising if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM); @@ -279,7 +293,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value) { - TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); ctx->x[0] = stacktrace_exception_class(stacktrace); ctx->x[1] = exc_value; ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace); @@ -288,7 +302,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -297,7 +311,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -435,7 +449,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -465,7 +479,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -488,7 +502,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index) static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live) { - TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live); + TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void *) ctx, (void *) jit_state, stack_need, heap_need, live); if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) { TRIM_LIVE_REGS(live); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { @@ -503,7 +517,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) { - TRACE("jit_get_imported_bif: bif=%u\n", bif); + TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif); const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif]; const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr; return result; @@ -511,7 +525,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words) { - TRACE("jit_deallocate: n_words=%u\n", n_words); + TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words); ctx->cp = ctx->e[n_words]; ctx->e += n_words + 1; // Hopefully, we only need x[0] @@ -536,7 +550,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers) { - TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers); + TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers); size_t heap_free = context_avail_free_memory(ctx); // if we need more heap space than is currently free, then try to GC the needed space if (heap_free < heap_need) { @@ -550,7 +564,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) { TRIM_LIVE_REGS(live_registers); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - TRACE("Unable to ensure free memory. heap_need=%i\n", heap_need); + TRACE("Unable to ensure free memory. heap_need=%" PRIu32 "\n", heap_need); set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM); return false; } @@ -661,13 +675,13 @@ static term jit_alloc_big_integer_fragment( static term jit_term_alloc_tuple(Context *ctx, uint32_t size) { - TRACE("jit_term_alloc_tuple: size=%u\n", size); + TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size); return term_alloc_tuple(size, &ctx->heap); } static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree) { - TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree); + TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree); size_t size = numfree + BOXED_FUN_SIZE; term *boxed_func = memory_heap_alloc(&ctx->heap, size); @@ -873,7 +887,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state) static term jit_mailbox_peek(Context *ctx) { - TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id); term out = term_invalid_term(); mailbox_peek(ctx, &out); return out; @@ -881,26 +895,26 @@ static term jit_mailbox_peek(Context *ctx) static void jit_mailbox_remove_message(Context *ctx) { - TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_remove_message(&ctx->mailbox, &ctx->heap); } static void jit_timeout(Context *ctx) { - TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); mailbox_reset(&ctx->mailbox); } static void jit_mailbox_next(Context *ctx) { - TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_next(&ctx->mailbox); } static void jit_cancel_timeout(Context *ctx) { - TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) { scheduler_cancel_timeout(ctx); } @@ -908,7 +922,7 @@ static void jit_cancel_timeout(Context *ctx) static void jit_clear_timeout_flag(Context *ctx) { - TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); } diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index ec11860a86..ee22796ce6 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -175,6 +175,7 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 #define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 #define JIT_VARIANT_PIC 1 #define JIT_VARIANT_FLOAT32 2 @@ -196,6 +197,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 12 #endif +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 9618370be5..d3c7f2f44a 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -38,6 +38,9 @@ #include #include +// #define ENABLE_TRACE +#include "trace.h" + #ifdef WITH_ZLIB #include #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index d6b5ef8bcd..5e60b02bc6 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5703,6 +5703,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_AARCH64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M return JIT_ARMV6M_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + return JIT_RISCV32_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index d1fdeacb4f..f3d3de3152 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -7599,7 +7599,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) } terminate_context: - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 9dec6ec5f3..4ddc362924 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,8 +51,21 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() project(atomvm-esp32) diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index ebcedd3b57..8156bb2ac8 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,6 +25,7 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" + "jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index f222c88025..6cbd5bbc85 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -811,3 +811,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) UNUSED(global); #endif } + +#ifndef AVM_NO_JIT +#include + +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + uintptr_t addr = (uintptr_t) (native_code + offset); + +#if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + // On RISC-V ESP32 targets, native code in flash needs to be accessed + // through the instruction cache (IROM) not data cache (DROM) +#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) + // ESP32-C3 and C2 have separate DROM and IROM regions + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + // Convert from data cache address to instruction cache address + addr = addr - SOC_DROM_LOW + SOC_IROM_LOW; + } +#endif + // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#endif + + return (ModuleNativeEntryPoint) addr; +} +#endif diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index 97580dbfea..00595afeef 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H) target_include_directories(libAtomVM PUBLIC ../avm_sys/) endif() -target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +if (AVM_DISABLE_JIT) + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +else() + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +endif() target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild index 88bf92aa1a..1eba944ed7 100755 --- a/src/platforms/esp32/main/Kconfig.projbuild +++ b/src/platforms/esp32/main/Kconfig.projbuild @@ -39,5 +39,11 @@ menu "AtomVM configuration" depends on USE_USB_SERIAL help Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled. + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code endmenu diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index 2d97d91345..27402ee83a 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,8 +57,21 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() project(atomvm-esp32-test) diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index e2d67269e8..dc4789f374 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,11 +20,31 @@ add_library(esp32_test_modules) +include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +53,7 @@ function(compile_erlang module_name) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes) compile_erlang(test_twdt) compile_erlang(test_tz) +set(erlang_test_beams + test_esp_partition.beam + test_file.beam + test_wifi_example.beam + test_list_to_atom.beam + test_list_to_binary.beam + test_md5.beam + test_crypto.beam + test_monotonic_time.beam + test_mount.beam + test_net.beam + test_rtc_slow.beam + test_select.beam + test_socket.beam + test_ssl.beam + test_time_and_processes.beam + test_twdt.beam + test_tz.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +else() + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_esp_partition.beam - test_file.beam - test_wifi_example.beam - test_list_to_atom.beam - test_list_to_binary.beam - test_md5.beam - test_crypto.beam - test_monotonic_time.beam - test_mount.beam - test_net.beam - test_rtc_slow.beam - test_select.beam - test_socket.beam - test_ssl.beam - test_time_and_processes.beam - test_twdt.beam - test_tz.beam + ${erlang_test_beams_to_package} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam" + ${erlang_test_beams_depends} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index deb315c1a8..86e5e6683a 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -71,8 +71,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH "armv6m") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$") + # Pico2 RISC-V processor (Hazard3) + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "riscv32") + endif() else() - # Typically riscv is not supported yet + # Other processors not supported yet if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) diff --git a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt index b203d168b2..5c6526d5e2 100644 --- a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt +++ b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt @@ -21,12 +21,15 @@ include(ExternalProject) if(NOT AVM_DISABLE_JIT) set(host_atomvm_jit_target "--target=jit") +set(atomvlib_name "atomvmlib-${AVM_JIT_TARGET_ARCH}.avm") else() set(host_atomvm_jit_target "") +set(atomvlib_name "atomvmlib.avm") endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + CMAKE_ARGS -DAVM_DISABLE_JIT=${AVM_DISABLE_JIT} BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM --target=UF2Tool ) @@ -67,15 +70,13 @@ set(erlang_test_beams ) if(NOT AVM_DISABLE_JIT) - set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) - list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) - list(APPEND erlang_test_beams ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + list(TRANSFORM erlang_test_beams PREPEND ${AVM_JIT_TARGET_ARCH}/) endif() add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rp2_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i rp2_test_modules.avm - HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm + HostAtomVM-prefix/src/HostAtomVM-build/libs/${atomvlib_name} ${erlang_test_beams} DEPENDS HostAtomVM diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 26ab6b4ecc..45473d9f10 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,8 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_tests + jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl new file mode 100644 index 0000000000..fbba1f8937 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -0,0 +1,982 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value) +). + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instruction tests +%%----------------------------------------------------------------------------- + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + ) + ]. + +or_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + ) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + ) + ]. + +sll_test_() -> + [ + ?_assertAsmEqual( + <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2) + ) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2) + ) + ]. + +sra_test_() -> + [ + ?_assertAsmEqual( + <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2) + ) + ]. + +slt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2) + ) + ]. + +sltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2) + ) + ]. + +%%----------------------------------------------------------------------------- +%% I-type immediate instruction tests +%%----------------------------------------------------------------------------- + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + ), + ?_assertAsmEqual( + <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) + ), + ?_assertAsmEqual( + <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048) + ) + ]. + +andi_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + ) + ]. + +ori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15) + ) + ]. + +xori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1) + ) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual( + <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + ), + ?_assertAsmEqual( + <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual( + <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + ) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + ) + ]. + +slti_test_() -> + [ + ?_assertAsmEqual( + <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1) + ) + ]. + +sltiu_test_() -> + [ + ?_assertAsmEqual( + <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Load instruction tests +%%----------------------------------------------------------------------------- + +lw_test_() -> + [ + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual( + <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) + ), + ?_assertAsmEqual( + <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047) + ) + ]. + +lh_test_() -> + [ + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)), + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)), + ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2)) + ]. + +lhu_test_() -> + [ + ?_assertAsmEqual( + <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)), + ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2)) + ]. + +lb_test_() -> + [ + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)), + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)), + ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1)) + ]. + +lbu_test_() -> + [ + ?_assertAsmEqual( + <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)), + ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Store instruction tests +%%----------------------------------------------------------------------------- + +sw_test_() -> + [ + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) + ]. + +sh_test_() -> + [ + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)), + ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2)) + ]. + +sb_test_() -> + [ + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)), + ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Branch instruction tests +%%----------------------------------------------------------------------------- + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) + ), + % Test c.beqz (compressed reg with zero) + ?_assertAsmEqual( + <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + ), + ?_assertAsmEqual( + <<16#cf81:16/little>>, "beq a5, zero, .+24", jit_riscv32_asm:beq(a5, zero, 24) + ), + % Test beq with non-compressed reg and zero (falls through to encode_b_type) + ?_assertAsmEqual( + <<16#00030463:32/little>>, "beq t1, zero, .+8", jit_riscv32_asm:beq(t1, zero, 8) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4) + ), + % Test c.bnez (compressed reg with zero) + ?_assertAsmEqual( + <<16#ea11:16/little>>, "bne a2, zero, .+20", jit_riscv32_asm:bne(a2, zero, 20) + ), + ?_assertAsmEqual( + <<16#fffd:16/little>>, "bne a5, zero, .-2", jit_riscv32_asm:bne(a5, zero, -2) + ), + % Test bne with non-compressed reg and zero (falls through to encode_b_type) + ?_assertAsmEqual( + <<16#00031463:32/little>>, "bne t1, zero, .+8", jit_riscv32_asm:bne(t1, zero, 8) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4) + ) + ]. + +bge_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4) + ) + ]. + +bltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4) + ) + ]. + +bgeu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Jump instruction tests +%%----------------------------------------------------------------------------- + +jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + ), + % Test c.j (jal zero, offset) + ?_assertAsmEqual( + <<16#a011:16/little>>, "j .+4", jit_riscv32_asm:jal(zero, 4) + ), + ?_assertAsmEqual( + <<16#bffd:16/little>>, "j .-2", jit_riscv32_asm:jal(zero, -2) + ), + % Test full J-type encoding (encode_j_type) with larger offsets + ?_assertAsmEqual( + <<16#008005ef:32/little>>, "jal a1, .+8", jit_riscv32_asm:jal(a1, 8) + ), + ?_assertAsmEqual( + <<16#ffdffa6f:32/little>>, "jal s4, .-4", jit_riscv32_asm:jal(s4, -4) + ), + % Test with maximum positive offset (1048574) + ?_assertAsmEqual( + <<16#7ffff56f:32/little>>, "jal a0, .+1048574", jit_riscv32_asm:jal(a0, 1048574) + ), + % Test with maximum negative offset (-1048576) + ?_assertAsmEqual( + <<16#800005ef:32/little>>, "jal a1, .-1048576", jit_riscv32_asm:jal(a1, -1048576) + ), + ?_assertAsmEqual( + <<16#00000517:32/little, 16#9502:16/little>>, + "auipc a0, 0\njalr a0", + jit_riscv32_asm:call(a0, 0) + ), + ?_assertAsmEqual( + <<16#00002517:32/little, 16#800500e7:32/little>>, + "auipc a0, 0x2\njalr -2048(a0)", + jit_riscv32_asm:call(a0, 16#1800) + ) + ]. + +jalr_test_() -> + [ + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) + ]. + +%%----------------------------------------------------------------------------- +%% Upper immediate instruction tests +%%----------------------------------------------------------------------------- + +lui_test_() -> + [ + ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ]. + +auipc_test_() -> + [ + ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)), + ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +nop_test_() -> + [ + % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed + ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop()) + ]. + +li_test_() -> + [ + ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)), + + % 0x12345 = 74565 - requires lui + addi + ?_assertAsmEqual( + <<16#6549:16/little, 16#34550513:32/little>>, + "li a0, 0x12345", + jit_riscv32_asm:li(a0, 16#12345) + ), + % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) + ?_assertAsmEqual( + <<16#80000537:32/little, 16#157d:16/little>>, + "li a0, 0x7fffffff", + jit_riscv32_asm:li(a0, 16#7FFFFFFF) + ), + % 0xFFFFFFFF = 4294967295 (maximum 32-bit unsigned, interpreted as -1 signed) + ?_assertAsmEqual( + <<16#fff00513:32/little>>, + "li a0, 0xffffffff", + jit_riscv32_asm:li(a0, 16#FFFFFFFF) + ), + % Test lui-only cases (lower 12 bits are zero) + % 0x80000000 = 2147483648 (unsigned), -2147483648 (signed) + ?_assertAsmEqual( + <<16#800005b7:32/little>>, + "li a1, 0x80000000", + jit_riscv32_asm:li(a1, 16#80000000) + ), + % Same value as signed negative + ?_assertAsmEqual( + <<16#800005b7:32/little>>, + "li a1, -0x80000000", + jit_riscv32_asm:li(a1, -16#80000000) + ), + % 0x100000 = 1048576 (lower 12 bits zero) + ?_assertAsmEqual( + <<16#00100537:32/little>>, + "li a0, 0x100000", + jit_riscv32_asm:li(a0, 16#100000) + ), + % Test c.lui cases (2 bytes, -32 to 31, lower 12 bits zero) + % 0x1000 = 4096 (upper = 1) + ?_assertAsmEqual( + <<16#6505:16/little>>, + "li a0, 0x1000", + jit_riscv32_asm:li(a0, 16#1000) + ), + % 0x1f000 = 126976 (upper = 31, max for c.lui) + ?_assertAsmEqual( + <<16#657d:16/little>>, + "li a0, 0x1f000", + jit_riscv32_asm:li(a0, 16#1f000) + ), + % 0xfffffffffffe0000 = -131072 (upper = -32, min for c.lui) + ?_assertAsmEqual( + <<16#7501:16/little>>, + "li a0, -0x20000", + jit_riscv32_asm:li(a0, -16#20000) + ), + % 0x20000 = 131072 (upper = 32, just outside c.lui range, needs full lui) + ?_assertAsmEqual( + <<16#00020537:32/little>>, + "li a0, 0x20000", + jit_riscv32_asm:li(a0, 16#20000) + ) + ]. + +mv_test_() -> + [ + ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ]. + +not_test_() -> + [ + ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)), + ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)), + ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual( + <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8) + ), + ?_assertAsmEqual( + <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4) + ) + ]. + +jr_test_() -> + [ + ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret()) + ]. + +%%----------------------------------------------------------------------------- +%% M Extension (Multiply/Divide) instruction tests +%%----------------------------------------------------------------------------- + +mul_test_() -> + [ + ?_assertAsmEqual( + <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5) + ), + ?_assertAsmEqual( + <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6) + ), + ?_assertAsmEqual( + <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1) + ), + ?_assertAsmEqual( + <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3) + ) + ]. + +%%----------------------------------------------------------------------------- +%% System instruction tests +%%----------------------------------------------------------------------------- + +c_ebreak_test_() -> + [ + ?_assertAsmEqual( + <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak() + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical instruction tests +%%----------------------------------------------------------------------------- + +c_add_test_() -> + [ + ?_assertAsmEqual( + <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2) + ), + ?_assertAsmEqual( + <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5) + ), + ?_assertAsmEqual( + <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0) + ) + ]. + +c_mv_test_() -> + [ + ?_assertAsmEqual( + <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2) + ), + ?_assertAsmEqual( + <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5) + ), + ?_assertAsmEqual( + <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0) + ) + ]. + +c_sub_test_() -> + [ + ?_assertAsmEqual( + <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0) + ), + ?_assertAsmEqual( + <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1) + ), + ?_assertAsmEqual( + <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1) + ) + ]. + +c_and_test_() -> + [ + ?_assertAsmEqual( + <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1) + ) + ]. + +c_or_test_() -> + [ + ?_assertAsmEqual( + <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1) + ) + ]. + +c_xor_test_() -> + [ + ?_assertAsmEqual( + <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate instruction tests +%%----------------------------------------------------------------------------- + +c_addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1) + ), + ?_assertAsmEqual( + <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16) + ), + ?_assertAsmEqual( + <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8) + ) + ]. + +c_andi_test_() -> + [ + ?_assertAsmEqual( + <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10) + ), + ?_assertAsmEqual( + <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1) + ), + ?_assertAsmEqual( + <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16) + ) + ]. + +c_li_test_() -> + [ + ?_assertAsmEqual( + <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10) + ), + ?_assertAsmEqual( + <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1) + ), + ?_assertAsmEqual( + <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1) + ), + ?_assertAsmEqual( + <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32) + ) + ]. + +c_lui_test_() -> + [ + ?_assertAsmEqual( + <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10) + ), + ?_assertAsmEqual( + <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1) + ), + ?_assertAsmEqual( + <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1) + ) + ]. + +c_addi16sp_test_() -> + [ + ?_assertAsmEqual( + <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16) + ), + ?_assertAsmEqual( + <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512) + ), + ?_assertAsmEqual( + <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80) + ) + ]. + +c_addi4spn_test_() -> + [ + ?_assertAsmEqual( + <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4) + ), + ?_assertAsmEqual( + <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32) + ), + ?_assertAsmEqual( + <<16#1ffc:16/little>>, + "c.addi4spn a5, sp, 1020", + jit_riscv32_asm:c_addi4spn(a5, 1020) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Shift instruction tests +%%----------------------------------------------------------------------------- + +c_slli_test_() -> + [ + ?_assertAsmEqual( + <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31) + ), + ?_assertAsmEqual( + <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16) + ) + ]. + +c_srli_test_() -> + [ + ?_assertAsmEqual( + <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31) + ), + ?_assertAsmEqual( + <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16) + ) + ]. + +c_srai_test_() -> + [ + ?_assertAsmEqual( + <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31) + ), + ?_assertAsmEqual( + <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store instruction tests +%%----------------------------------------------------------------------------- + +c_lw_test_() -> + [ + ?_assertAsmEqual( + <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124}) + ) + ]. + +c_sw_test_() -> + [ + ?_assertAsmEqual( + <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124}) + ) + ]. + +c_lwsp_test_() -> + [ + ?_assertAsmEqual( + <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252) + ) + ]. + +c_swsp_test_() -> + [ + ?_assertAsmEqual( + <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump instruction tests +%%----------------------------------------------------------------------------- + +c_beqz_test_() -> + [ + ?_assertAsmEqual( + <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4) + ), + ?_assertAsmEqual( + <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0) + ) + ]. + +c_bnez_test_() -> + [ + ?_assertAsmEqual( + <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4) + ), + ?_assertAsmEqual( + <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6) + ), + ?_assertAsmEqual( + <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0) + ) + ]. + +c_j_test_() -> + [ + ?_assertAsmEqual( + <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4) + ), + ?_assertAsmEqual( + <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6) + ), + ?_assertAsmEqual( + <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0) + ) + ]. + +c_jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4) + ), + ?_assertAsmEqual( + <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0) + ) + ]. + +c_jr_test_() -> + [ + ?_assertAsmEqual( + <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0) + ), + ?_assertAsmEqual( + <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0) + ), + ?_assertAsmEqual( + <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra) + ) + ]. + +c_jalr_test_() -> + [ + ?_assertAsmEqual( + <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0) + ), + ?_assertAsmEqual( + <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +c_nop_test_() -> + [ + ?_assertAsmEqual( + <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop() + ) + ]. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl new file mode 100644 index 0000000000..21ce325526 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -0,0 +1,3433 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_riscv32). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00462f83 lw t6,4(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00862f83 lw t6,8(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 02b00613 li a2,43\n" + " 16: 02c00693 li a3,44\n" + " 1a: 9f82 jalr t6\n" + " 1c: 8faa mv t6,a0\n" + " 1e: 4082 lw ra,0(sp)\n" + " 20: 4512 lw a0,4(sp)\n" + " 22: 45a2 lw a1,8(sp)\n" + " 24: 4632 lw a2,12(sp)\n" + " 26: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 4641 li a2,16\n" + " 6: 02000693 li a3,32\n" + " a: 4709 li a4,2\n" + " c: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 4f0d li t5,3\n" + " 6: ffff4f13 not t5,t5\n" + " a: 01efffb3 and t6,t6,t5\n" + " e: 01c52f03 lw t5,28(a0)\n" + " 12: 0b800e93 li t4,184\n" + " 16: 9eb2 add t4,t4,a2\n" + " 18: 000eae83 lw t4,0(t4)\n" + " 1c: 1141 addi sp,sp,-16\n" + " 1e: c006 sw ra,0(sp)\n" + " 20: c22a sw a0,4(sp)\n" + " 22: c42e sw a1,8(sp)\n" + " 24: c632 sw a2,12(sp)\n" + " 26: 867e mv a2,t6\n" + " 28: 04000693 li a3,64\n" + " 2c: 4721 li a4,8\n" + " 2e: 87fa mv a5,t5\n" + " 30: 9e82 jalr t4\n" + " 32: 8eaa mv t4,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 04862f83 lw t6,72(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 45cd li a1,19\n" + " 10: 9f82 jalr t6\n" + " 12: 8faa mv t6,a0\n" + " 14: 4082 lw ra,0(sp)\n" + " 16: 4512 lw a0,4(sp)\n" + " 18: 45a2 lw a1,8(sp)\n" + " 1a: 4632 lw a2,12(sp)\n" + " 1c: 0141 addi sp,sp,16\n" + " 1e: 04862f03 lw t5,72(a2)\n" + " 22: 1101 addi sp,sp,-32\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: c87e sw t6,16(sp)\n" + " 2e: 45d1 li a1,20\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 02010113 addi sp,sp,32\n" + " 42: 04862e83 lw t4,72(a2)\n" + " 46: 1101 addi sp,sp,-32\n" + " 48: c006 sw ra,0(sp)\n" + " 4a: c22a sw a0,4(sp)\n" + " 4c: c42e sw a1,8(sp)\n" + " 4e: c632 sw a2,12(sp)\n" + " 50: c87a sw t5,16(sp)\n" + " 52: ca7e sw t6,20(sp)\n" + " 54: 45cd li a1,19\n" + " 56: 9e82 jalr t4\n" + " 58: 8eaa mv t4,a0\n" + " 5a: 4082 lw ra,0(sp)\n" + " 5c: 4512 lw a0,4(sp)\n" + " 5e: 45a2 lw a1,8(sp)\n" + " 60: 4632 lw a2,12(sp)\n" + " 62: 4f42 lw t5,16(sp)\n" + " 64: 4fd2 lw t6,20(sp)\n" + " 66: 02010113 addi sp,sp,32\n" + " 6a: 03462e03 lw t3,52(a2)\n" + " 6e: 1101 addi sp,sp,-32\n" + " 70: c006 sw ra,0(sp)\n" + " 72: c22a sw a0,4(sp)\n" + " 74: c42e sw a1,8(sp)\n" + " 76: c632 sw a2,12(sp)\n" + " 78: c876 sw t4,16(sp)\n" + " 7a: 000fa583 lw a1,0(t6)\n" + " 7e: 000f2603 lw a2,0(t5)\n" + " 82: 9e02 jalr t3\n" + " 84: 8e2a mv t3,a0\n" + " 86: 4082 lw ra,0(sp)\n" + " 88: 4512 lw a0,4(sp)\n" + " 8a: 45a2 lw a1,8(sp)\n" + " 8c: 4632 lw a2,12(sp)\n" + " 8e: 4ec2 lw t4,16(sp)\n" + " 90: 02010113 addi sp,sp,32\n" + " 94: 01cea023 sw t3,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, 1), + {State2, t5} = ?BACKEND:move_to_native_register(State1, 2), + {State3, t4} = ?BACKEND:move_to_native_register(State2, 3), + {State4, t3} = ?BACKEND:move_to_native_register(State3, 4), + {State5, t2} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + t5, t6, {free, t3}, t4, {free, t2} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 4f09 li t5,2\n" + " 4: 4e8d li t4,3\n" + " 6: 4e11 li t3,4\n" + " 8: 4395 li t2,5\n" + " a: 0e400313 li t1,228\n" + " e: 9332 add t1,t1,a2\n" + " 10: 00032303 lw t1,0(t1)\n" + " 14: 1101 addi sp,sp,-32\n" + " 16: c006 sw ra,0(sp)\n" + " 18: c22a sw a0,4(sp)\n" + " 1a: c42e sw a1,8(sp)\n" + " 1c: c632 sw a2,12(sp)\n" + " 1e: c876 sw t4,16(sp)\n" + " 20: ca7a sw t5,20(sp)\n" + " 22: cc7e sw t6,24(sp)\n" + " 24: 857a mv a0,t5\n" + " 26: 85fe mv a1,t6\n" + " 28: 8672 mv a2,t3\n" + " 2a: 86f6 mv a3,t4\n" + " 2c: 871e mv a4,t2\n" + " 2e: 9302 jalr t1\n" + " 30: 832a mv t1,a0\n" + " 32: 4082 lw ra,0(sp)\n" + " 34: 4512 lw a0,4(sp)\n" + " 36: 45a2 lw a1,8(sp)\n" + " 38: 4632 lw a2,12(sp)\n" + " 3a: 4ec2 lw t4,16(sp)\n" + " 3c: 4f52 lw t5,20(sp)\n" + " 3e: 4fe2 lw t6,24(sp)\n" + " 40: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 57fd li a5,-1\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 4621 li a2,8\n" + " a: 2cb00693 li a3,715\n" + " e: 877e mv a4,t6\n" + " 10: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 47a9 li a5,10\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 00af8463 beq t6,a0,0x24\n" + " 20: 857e mv a0,t6\n" + " 22: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(t5, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 8f7e mv t5,t6\n" + " 1e: 00af0463 beq t5,a0,0x26\n" + " 22: 857a mv a0,t5\n" + " 24: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 0ff1 addi t6,t6,28\n" + " 6: 01f52a23 sw t6,20(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd363 bgez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 5efd li t4,-1\n" + " a: 01df9363 bne t6,t4,0x10\n" + " e: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f05 addi t5,t5,1\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 0ef2 slli t4,t4,0x1c\n" + " e: 000e8363 beqz t4,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 0ff2 slli t6,t6,0x1c\n" + " e: 000f8363 beqz t6,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 8efe mv t4,t6\n" + " a: 03f00e13 li t3,63\n" + " e: 01cefeb3 and t4,t4,t3\n" + " 12: 4e21 li t3,8\n" + " 14: 01ce8363 beq t4,t3,0x1a\n" + " 18: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 4ea1 li t4,8\n" + " 12: 01df8363 beq t6,t4,0x18\n" + " 16: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 01c52f03 lw t5,28(a0)\n" + "8: 03b00e93 li t4,59\n" + "c: 01df9463 bne t6,t4,0x14\n" + "10: 0f09 addi t5,t5,2\n" + "12: a011 j 0x16\n" + "14: 0f11 addi t5,t5,4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 0f8e slli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 04668067 jr 70(a3) # 0x46\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03068067 jr 48(a3) # 0x40\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f8663 beqz t6,0x2e\n" + " 26: a829 j 0x40\n" + " 28: 0001 nop\n" + " 2a: 00000013 nop\n" + " 2e: 00000f97 auipc t6,0x0\n" + " 32: 0fc9 addi t6,t6,18 # 0x40\n" + " 34: 0001 nop\n" + " 36: 01f5a223 sw t6,4(a1)\n" + " 3a: 00862f83 lw t6,8(a2)\n" + " 3e: 8f82 jr t6\n" + " 40: 00062f83 lw t6,0(a2)\n" + " 44: 8f82 jr t6\n" + " 46: 00462f83 lw t6,4(a2)\n" + " 4a: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed) + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f8663 beqz t6,0x16\n" + " e: a829 j 0x28\n" + " 10: 0001 nop\n" + " 12: 00000013 nop\n" + " 16: 00000f97 auipc t6,0x0\n" + " 1a: 0fc9 addi t6,t6,18 # 0x28\n" + " 1c: 0001 nop\n" + " 1e: 01f5a223 sw t6,4(a1)\n" + " 22: 00862f83 lw t6,8(a2)\n" + " 26: 8f82 jr t6\n" + " 28: 00062f83 lw t6,0(a2)\n" + " 2c: 8f82 jr t6\n" + " 2e: 00462f83 lw t6,4(a2)\n" + " 32: 8f82 jr t6" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#118), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 4589 li a1,2\n" + " 12: 9f82 jalr t6\n" + " 14: 8faa mv t6,a0\n" + " 16: 4082 lw ra,0(sp)\n" + " 18: 4512 lw a0,4(sp)\n" + " 1a: 45a2 lw a1,8(sp)\n" + " 1c: 4632 lw a2,12(sp)\n" + " 1e: 0141 addi sp,sp,16\n" + " 20: 03c62f03 lw t5,60(a2)\n" + " 24: 1101 addi sp,sp,-32\n" + " 26: c006 sw ra,0(sp)\n" + " 28: c22a sw a0,4(sp)\n" + " 2a: c42e sw a1,8(sp)\n" + " 2c: c632 sw a2,12(sp)\n" + " 2e: c87e sw t6,16(sp)\n" + " 30: 3b7ff5b7 lui a1,0x3b7ff\n" + " 34: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 38: 9f02 jalr t5\n" + " 3a: 8f2a mv t5,a0\n" + " 3c: 4082 lw ra,0(sp)\n" + " 3e: 4512 lw a0,4(sp)\n" + " 40: 45a2 lw a1,8(sp)\n" + " 42: 4632 lw a2,12(sp)\n" + " 44: 4fc2 lw t6,16(sp)\n" + " 46: 02010113 addi sp,sp,32\n" + " 4a: 1141 addi sp,sp,-16\n" + " 4c: c006 sw ra,0(sp)\n" + " 4e: c22a sw a0,4(sp)\n" + " 50: c42e sw a1,8(sp)\n" + " 52: c632 sw a2,12(sp)\n" + " 54: 4581 li a1,0\n" + " 56: 4605 li a2,1\n" + " 58: 4d14 lw a3,24(a0)\n" + " 5a: 877a mv a4,t5\n" + " 5c: 9f82 jalr t6\n" + " 5e: 8faa mv t6,a0\n" + " 60: 4082 lw ra,0(sp)\n" + " 62: 4512 lw a0,4(sp)\n" + " 64: 45a2 lw a1,8(sp)\n" + " 66: 4632 lw a2,12(sp)\n" + " 68: 0141 addi sp,sp,16\n" + " 6a: 000f9763 bnez t6,0x78\n" + " 6e: 01862f83 lw t6,24(a2)\n" + " 72: 07200613 li a2,114\n" + " 76: 8f82 jr t6\n" + " 78: 01f52c23 sw t6,24(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 4f0d li t5,3\n" + "6: ffff4f13 not t5,t5\n" + "a: 01efffb3 and t6,t6,t5\n" + "e: 004fae83 lw t4,4(t6)\n" + "12: 01452f03 lw t5,20(a0)\n" + "16: 01df2223 sw t4,4(t5)\n" + "1a: 000fae83 lw t4,0(t6)\n" + "1e: 01452f03 lw t5,20(a0)\n" + "22: 01df2023 sw t4,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 020f0f63 beqz t5,0x48\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 03f00f13 li t5,63\n" + " 36: 01efffb3 and t6,t6,t5\n" + " 3a: 4f21 li t5,8\n" + " 3c: 01ef8663 beq t6,t5,0x48\n" + " 40: a0c1 j 0x100\n" + " 42: 0001 nop\n" + " 44: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 040f0763 beqz t5,0x58\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 8f7e mv t5,t6\n" + " 34: 03f00e93 li t4,63\n" + " 38: 01df7f33 and t5,t5,t4\n" + " 3c: 4ea1 li t4,8\n" + " 3e: 01df0d63 beq t5,t4,0x58\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f61 li t5,24\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: a0fd j 0x100\n" + " 14: 0001 nop\n" + " 16: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 7ef0006f j 0x1000\n" + " 16: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 00001f17 auipc t5,0x1\n" + " 16: feef0067 jr -18(t5) # 0x1000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 0ff9 addi t6,t6,30 # 0x1e\n" + " 6: 0001 nop\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 6f85 lui t6,0x1\n" + " e: 388f8f93 addi t6,t6,904 # 0x1388\n" + " 12: 07862f03 lw t5,120(a2)\n" + " 16: 867e mv a2,t6\n" + " 18: 02a00693 li a3,42\n" + " 1c: 8f02 jr t5\n" + " 1e: 05462f83 lw t6,84(a2)\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 9f82 jalr t6\n" + " 2e: 8faa mv t6,a0\n" + " 30: 4082 lw ra,0(sp)\n" + " 32: 4512 lw a0,4(sp)\n" + " 34: 45a2 lw a1,8(sp)\n" + " 36: 4632 lw a2,12(sp)\n" + " 38: 0141 addi sp,sp,16\n" + " 3a: 00af8463 beq t6,a0,0x42\n" + " 3e: 857e mv a0,t6\n" + " 40: 8082 ret\n" + " 42: 08400f93 li t6,132\n" + " 46: 9fb2 add t6,t6,a2\n" + " 48: 000faf83 lw t6,0(t6)\n" + " 4c: 1141 addi sp,sp,-16\n" + " 4e: c006 sw ra,0(sp)\n" + " 50: c22a sw a0,4(sp)\n" + " 52: c42e sw a1,8(sp)\n" + " 54: c632 sw a2,12(sp)\n" + " 56: 4589 li a1,2\n" + " 58: 9f82 jalr t6\n" + " 5a: 8faa mv t6,a0\n" + " 5c: 4082 lw ra,0(sp)\n" + " 5e: 4512 lw a0,4(sp)\n" + " 60: 45a2 lw a1,8(sp)\n" + " 62: 4632 lw a2,12(sp)\n" + " 64: 0141 addi sp,sp,16\n" + " 66: 000f9763 bnez t6,0x74\n" + " 6a: 07c62f83 lw t6,124(a2)\n" + " 6e: 02a00613 li a2,42\n" + " 72: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 6: ffffffff .insn 4, 0xffffffff\n" + " a: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 12: ffffffff .insn 4, 0xffffffff\n" + " 16: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 1e: ffffffff .insn 4, 0xffffffff\n" + " 22: ffffffff .insn 4, 0xffffffff\n" + " 24: ffffffff .insn 4, 0xffffffff\n" + " 28: ffffffff .insn 4, 0xffffffff\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 07462f83 lw t6,116(a2)\n" + " 34: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated auipc + addi + ret + labels table + lines table + % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes + % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + ?assert(byte_size(Stream) >= 32), + + % Expected: auipc a0, 0 + addi a0, a0, 10 + ret + padding + labels table + lines table + % The data tables start at offset 0xa (10) because of alignment padding + Dump = + << + " 0: 00000517 auipc a0,0x0\n" + " 4: 0529 addi a0,a0,10 # 0xa\n" + " 6: 8082 ret\n" + " 8: 0200ffff .insn 4, 0x0200ffff\n" + " c: 0100 addi s0,sp,128\n" + " e: 0000 unimp\n" + " 10: 1000 addi s0,sp,32\n" + " 12: 0200 addi s0,sp,256\n" + " 14: 0000 unimp\n" + " 16: 2000 fld fs0,0(s0)\n" + " 18: 0200 addi s0,sp,256\n" + " 1a: 0a00 addi s0,sp,272\n" + " 1c: 0000 unimp\n" + " 1e: 1000 addi s0,sp,32\n" + " 20: 1400 addi s0,sp,544\n" + " 22: 0000 unimp\n" + " 24: 2000 fld fs0,0(s0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 02a00593 li a1,42\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 4581 li a1,0\n" + " 2e: 460d li a2,3\n" + " 30: 01452f03 lw t5,20(a0)\n" + " 34: 000f2683 lw a3,0(t5)\n" + " 38: 4d18 lw a4,24(a0)\n" + " 3a: 9f82 jalr t6\n" + " 3c: 8faa mv t6,a0\n" + " 3e: 4082 lw ra,0(sp)\n" + " 40: 4512 lw a0,4(sp)\n" + " 42: 45a2 lw a1,8(sp)\n" + " 44: 4632 lw a2,12(sp)\n" + " 46: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in a1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, a1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 0b000f93 li t6,176\n" + " 4: 9fb2 add t6,t6,a2\n" + " 6: 000faf83 lw t6,0(t6)\n" + " a: 1141 addi sp,sp,-16\n" + " c: c006 sw ra,0(sp)\n" + " e: c22a sw a0,4(sp)\n" + " 10: c42e sw a1,8(sp)\n" + " 12: c632 sw a2,12(sp)\n" + " 14: 8f2e mv t5,a1\n" + " 16: 867a mv a2,t5\n" + " 18: 4691 li a3,4\n" + " 1a: 4705 li a4,1\n" + " 1c: 9f82 jalr t6\n" + " 1e: 8faa mv t6,a0\n" + " 20: 4082 lw ra,0(sp)\n" + " 22: 4512 lw a0,4(sp)\n" + " 24: 45a2 lw a1,8(sp)\n" + " 26: 4632 lw a2,12(sp)\n" + " 28: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 0f62 slli t5,t5,0x18\n" + " 2a: 11800f93 li t6,280\n" + " 2e: 00000013 nop\n" + " 32: 01ff6f33 or t5,t5,t6\n" + " 36: 05e52e23 sw t5,92(a0)\n" + " 3a: 01062f83 lw t6,16(a2)\n" + " 3e: 4609 li a2,2\n" + " 40: 4695 li a3,5\n" + " 42: 577d li a4,-1\n" + " 44: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 8f7e mv t5,t6\n" + " 26: 8efa mv t4,t5\n" + " 28: 4e0d li t3,3\n" + " 2a: 01cefeb3 and t4,t4,t3\n" + " 2e: 4e09 li t3,2\n" + " 30: 01ce8a63 beq t4,t3,0x44\n" + " 34: 04c62f83 lw t6,76(a2)\n" + " 38: 03800613 li a2,56\n" + " 3c: 18b00693 li a3,395\n" + " 40: 877a mv a4,t5\n" + " 42: 8f82 jr t6\n" + " 44: 4e8d li t4,3\n" + " 46: fffece93 not t4,t4\n" + " 4a: 01df7f33 and t5,t5,t4\n" + " 4e: 000f2f03 lw t5,0(t5)\n" + " 52: 8efa mv t4,t5\n" + " 54: 03f00e13 li t3,63\n" + " 58: 01cefeb3 and t4,t4,t3\n" + " 5c: 4e51 li t3,20\n" + " 5e: 01ce8a63 beq t4,t3,0x72\n" + " 62: 04c62f83 lw t6,76(a2)\n" + " 66: 06600613 li a2,102\n" + " 6a: 18b00693 li a3,395\n" + " 6e: 877a mv a4,t5\n" + " 70: 8f82 jr t6\n" + " 72: 0005ae83 lw t4,0(a1)\n" + " 76: 000eae83 lw t4,0(t4)\n" + " 7a: 0ee2 slli t4,t4,0x18\n" + " 7c: 27000f13 li t5,624\n" + " 80: 00000013 nop\n" + " 84: 01eeeeb3 or t4,t4,t5\n" + " 88: 05d52e23 sw t4,92(a0)\n" + " 8c: 08000f13 li t5,128\n" + " 90: 9f32 add t5,t5,a2\n" + " 92: 000f2f03 lw t5,0(t5)\n" + " 96: 867e mv a2,t6\n" + " 98: 4681 li a3,0\n" + " 9a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 4f81 li t6,0\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 4f81 li t6,0\n" + " 2: 05f52c23 sw t6,88(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, t5}, << + " 0: 4f81 li t6,0\n" + " 2: 01ff2023 sw t6,0(t5)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 01efa423 sw t5,8(t6)\n" + " a: a8dd j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 05efa823 sw t5,80(t6)\n" + " a: a8dd j 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, a3}, << + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, t4, {x_reg, 0}, << + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, t5, {x_reg, extra}, << + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, t3, {ptr, a3}, << + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, a1, {y_reg, 0}, << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)\n" + " 12: a0fd j 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 5ffd li t6,-1\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -100, {x_reg, 0}, << + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 2, {x_reg, 0}, << + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, a3, 3, {ptr, t4}, << + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 1, {y_reg, 2}, << + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" + >>) + end), + %% move_array_element: reg[x] to native reg (t4) + ?_test(begin + move_array_element_test0(State0, a3, 1, t4, << + " 0: 0046ae83 lw t4,4(a3)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {y_reg, 31}, << + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {x_reg, 15}, << + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 03f52023 sw t6,32(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01452f03 lw t5,20(a0)\n" + " 12: 07ff2e23 sw t6,124(t5)" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 010e2f83 lw t6,16(t3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 8f72 mv t5,t3\n" + " a: 0f0a slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02a00f93 li t6,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: fd600f93 li t6,-42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0100f93 li t6,-255" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0000f93 li t6,-256\n" + " 4: a8f5 j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t5, Reg), + Dump = << + " 0: 000f2f03 lw t5,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02c52f83 lw t6,44(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, t5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02a00f13 li t5,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, t6, t4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 8efe mv t4,t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000fae03 lw t3,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 5114 lw a3,32(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, a2, 2, << + " 0: 0609 addi a2,a2,2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 967e add a2,a2,t6\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, a3, << + " 0: 9636 add a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, a2, 2, << + " 0: 1679 addi a2,a2,-2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, a3, << + " 0: 8e15 sub a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, a2, 2, << + " 0: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 3, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 4, << + " 0: 060a slli a2,a2,0x2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 5, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 6, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 7, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 8, << + " 0: 060e slli a2,a2,0x3" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 9, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 10, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 11, << + " 0: 4fad li t6,11\n" + " 2: 03f60633 mul a2,a2,t6" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 04300f93 li t6,67\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 9fb2 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: 1141 addi sp,sp,-16\n" + " e: c006 sw ra,0(sp)\n" + " 10: c22a sw a0,4(sp)\n" + " 12: c42e sw a1,8(sp)\n" + " 14: c632 sw a2,12(sp)\n" + " 16: 01452f03 lw t5,20(a0)\n" + " 1a: 014f2503 lw a0,20(t5)\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=123, offset=492, exceeds immediate limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register for large offset + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 9ffa add t6,t6,t5\n" + " a: 000faf83 lw t6,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test large Y register write with immediate value +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move immediate to a large Y register (123 * 4 = 492 bytes) + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp registers for large offset + Dump = << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t0+t1 fallback sequence when temps are exhausted + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 9316 add t1,t1,t0\n" + " 1e: 00032303 lw t1,0(t1)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t1, ResultReg). + +%% Test large Y register write with register exhaustion (uses t1/t0 fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t1/t0 fallback sequence + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 929a add t0,t0,t1\n" + " 1e: 01f2a023 sw t6,0(t0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 < 2048 + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 9002 ebreak" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use NOT+AND with t0 as temp + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 428d li t0,3\n" + " 1a: fff2c293 not t0,t0\n" + " 1e: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use AND with t0 as temp + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR) + ?assertEqual((512 + 1) * 8, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 4601 li a2,0\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9abce5b7 lui a1,0x9abce\n" + " 12: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 16: 12345637 lui a2,0x12345\n" + " 1a: 67860613 addi a2,a2,1656 # 0x12345678\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16\n" + " 2c: 04c62f03 lw t5,76(a2)\n" + " 30: 03000613 li a2,48\n" + " 34: 28b00693 li a3,651\n" + " 38: 877e mv a4,t6\n" + " 3a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention) +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 1101 addi sp,sp,-32\n" + " 12: c006 sw ra,0(sp)\n" + " 14: c22a sw a0,4(sp)\n" + " 16: c42e sw a1,8(sp)\n" + " 18: c632 sw a2,12(sp)\n" + " 1a: c876 sw t4,16(sp)\n" + " 1c: ca7a sw t5,20(sp)\n" + " 1e: cc7e sw t6,24(sp)\n" + " 20: 02a00513 li a0,42\n" + " 24: 9e02 jalr t3\n" + " 26: 8e2a mv t3,a0\n" + " 28: 4082 lw ra,0(sp)\n" + " 2a: 4512 lw a0,4(sp)\n" + " 2c: 45a2 lw a1,8(sp)\n" + " 2e: 4632 lw a2,12(sp)\n" + " 30: 4ec2 lw t4,16(sp)\n" + " 32: 4f52 lw t5,20(sp)\n" + " 34: 4fe2 lw t6,24(sp)\n" + " 36: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 468d li a3,3\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 1, t1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 4685 li a3,1\n" + " 2e: 871a mv a4,t1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, t1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 869a mv a3,t1\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t5, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, a1}, + [t5, a3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07a sw t5,32(sp)\n" + " 2e: d27e sw t6,36(sp)\n" + " 30: 832e mv t1,a1\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: c42a sw a0,8(sp)\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f02 lw t5,32(sp)\n" + " 4c: 5f92 lw t6,36(sp)\n" + " 4e: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, t5}, a3] + ), + ?assertEqual(ResultReg, t5), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07e sw t6,32(sp)\n" + " 2e: 00862303 lw t1,8(a2)\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: 8f2a mv t5,a0\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f82 lw t6,32(sp)\n" + " 4c: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test_() -> + [ + ?_test(begin + % Test 1: jump_to_continuation at offset 0 + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), + Stream = ?BACKEND:stream(State1), + % Expected: riscv32 PIC sequence + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 9faa add t6,t6,a0\n" + " 6: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + % Test 2: jump_to_continuation after jump table (non-zero relative address) + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes) + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}), + Stream = ?BACKEND:stream(State2), + % Expected: jump table (32 bytes) + jump_to_continuation + % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 8: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 14: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: 1f81 addi t6,t6,-32 # 0x0\n" + " 26: 9faa add t6,t6,a0\n" + " 28: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + Dump = + << + % jump table (new 8-byte format) + " 0: 00000697 auipc a3,0x0\n" + " 4: 0e068067 jr 224(a3) # 0xe0\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3) # 0x20\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 04868067 jr 72(a3) # 0x58\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0c268067 jr 194(a3) # 0xda\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" + % {move,{integer,8},{x,0}} + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" + % {call_only,2,{f,2}}. + " 30: 0085af83 lw t6,8(a1)\n" + " 34: 1ffd addi t6,t6,-1\n" + " 36: 01f5a423 sw t6,8(a1)\n" + " 3a: 000f8663 beqz t6,0x46\n" + " 3e: a829 j 0x58\n" + " 40: 0001 nop\n" + " 42: 00000013 nop\n" + " 46: 00000f97 auipc t6,0x0\n" + " 4a: 0fc9 addi t6,t6,18 # 0x58\n" + " 4c: 0001 nop\n" + " 4e: 01f5a223 sw t6,4(a1)\n" + " 52: 00862f83 lw t6,8(a2)\n" + " 56: 8f82 jr t6\n" + % label 2 + % {allocate,1,1}. + " 58: 01462f83 lw t6,20(a2)\n" + " 5c: 1141 addi sp,sp,-16\n" + " 5e: c006 sw ra,0(sp)\n" + " 60: c22a sw a0,4(sp)\n" + " 62: c42e sw a1,8(sp)\n" + " 64: c632 sw a2,12(sp)\n" + " 66: 4605 li a2,1\n" + " 68: 4681 li a3,0\n" + " 6a: 4705 li a4,1\n" + " 6c: 9f82 jalr t6\n" + " 6e: 8faa mv t6,a0\n" + " 70: 4082 lw ra,0(sp)\n" + " 72: 4512 lw a0,4(sp)\n" + " 74: 45a2 lw a1,8(sp)\n" + " 76: 4632 lw a2,12(sp)\n" + " 78: 0141 addi sp,sp,16\n" + " 7a: 01ff9f13 slli t5,t6,0x1f\n" + " 7e: 000f4763 bltz t5,0x8c\n" + " 82: 01862f83 lw t6,24(a2)\n" + " 86: 08600613 li a2,134\n" + " 8a: 8f82 jr t6\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " 8c: 03b00f13 li t5,59\n" + " 90: 01452f83 lw t6,20(a0)\n" + " 94: 01efa023 sw t5,0(t6)\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " 98: 0005af03 lw t5,0(a1)\n" + " 9c: 000f2f03 lw t5,0(t5)\n" + " a0: 0f62 slli t5,t5,0x18\n" + " a2: 36800f93 li t6,872\n" + " a6: 00000013 nop\n" + " aa: 01ff6f33 or t5,t5,t6\n" + " ae: 05e52e23 sw t5,92(a0)\n" + " b2: 0085af83 lw t6,8(a1)\n" + " b6: 1ffd addi t6,t6,-1\n" + " b8: 01f5a423 sw t6,8(a1)\n" + " bc: 000f8663 beqz t6,0xc8\n" + " c0: a829 j 0xda\n" + " c2: 0001 nop\n" + " c4: 00000013 nop\n" + " c8: 00000f97 auipc t6,0x0\n" + " cc: 0fc9 addi t6,t6,18 # 0xda\n" + " ce: 0001 nop\n" + " d0: 01f5a223 sw t6,4(a1)\n" + " d4: 00862f83 lw t6,8(a2)\n" + " d8: 8f82 jr t6\n" + %% (continuation) + % label 3 + " da: 00462f83 lw t6,4(a2)\n" + " de: 8f82 jr t6\n" + % label 0 + " e0: 00462f83 lw t6,4(a2)\n" + " e4: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle RISC-V 32-bit instructions (8 consecutive hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% RISC-V instructions are 32-bit little-endian + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index cfabfcf15f..cf989e746d 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -77,6 +77,8 @@ asm(Arch, Bin, Str) -> find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ + {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, + {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -104,6 +106,8 @@ get_asm_header(arm) -> get_asm_header(aarch64) -> ".text\n"; get_asm_header(x86_64) -> + ".text\n"; +get_asm_header(riscv32) -> ".text\n". %% Get architecture-specific assembler flags @@ -113,7 +117,9 @@ get_as_flags(arm) -> get_as_flags(aarch64) -> ""; get_as_flags(x86_64) -> - "--64". + "--64"; +get_as_flags(riscv32) -> + "-march=rv32imac". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index ff272f6eac..2d130cad03 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,8 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_tests, + jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). diff --git a/tests/test.c b/tests/test.c index 8afb649718..f1024f27ed 100644 --- a/tests/test.c +++ b/tests/test.c @@ -716,6 +716,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find armv6m directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + if (chdir("riscv32") != 0) { + perror("Error: cannot find riscv32 directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif