From 9def6515c6f7b5a03350b548502c713a31cb7570 Mon Sep 17 00:00:00 2001 From: Q Liu <52538137+LIUQyou@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:29:42 +0200 Subject: [PATCH] update benchmark and errors --- test/test_script_audioprocessing.sh | 47 ++ test/test_script_deeplearning.sh | 221 ++++++ test/test_script_geminiprocessing.sh | 97 +++ test/test_script_imageprocessing.sh | 58 ++ test/test_script_vectorizationprocessing.sh | 38 + .../build_results_crosscompile_summary.log | 23 + .../deeplearning/build_results_summary.log | 29 + .../deeplearning/dl-layer-ffn-benchmark.log | 18 + .../dl-layer-rmsnorm-benchmark.log | 18 + .../dl-layer-selfattention-benchmark.log | 18 + .../deeplearning/dl-model-lenet-benchmark.log | 19 + .../dl-model-mobilenetv3-benchmark.log | 19 + .../dl-model-resnet18-benchmark.log | 18 + .../dl-model-tinyllama-benchmark.log | 19 + .../dl-model-whisper-benchmark.log | 19 + .../dl-op-linalg-arithaddf-benchmark.log | 19 + .../dl-op-linalg-arithdivf-benchmark.log | 19 + .../dl-op-linalg-arithmulf-benchmark.log | 19 + .../dl-op-linalg-arithnegf-benchmark.log | 19 + .../dl-op-linalg-arithsubf-benchmark.log | 19 + .../dl-op-linalg-batch-matmul-benchmark.log | 25 + ...l-op-linalg-conv2d-nchw-fchw-benchmark.log | 19 + ...l-op-linalg-conv2d-nhwc-fhwc-benchmark.log | 21 + ...l-op-linalg-conv2d-nhwc-hwcf-benchmark.log | 19 + ...g-depthwise-conv-2d-nhwc-hwc-benchmark.log | 19 + .../dl-op-linalg-mathexp-benchmark.log | 19 + .../dl-op-linalg-mathfpow-benchmark.log | 19 + .../dl-op-linalg-mathrsqrt-benchmark.log | 19 + .../dl-op-linalg-matmul-benchmark.log | 22 + ...l-op-linalg-pooling-nhwc-sum-benchmark.log | 19 + .../dl-op-linalg-reduceaddf-benchmark.log | 10 + .../dl-op-linalg-reducemaxf-benchmark.log | 10 + ...p-linalg-softmax-exp-sum-div-benchmark.log | 19 + .../dl-op-matmul-transpose-b-benchmark.log | 21 + .../dl-op-tosa-transpose-benchmark.log | 17 + .../deeplearning/run_results_summary.log | 29 + test_result/geminiprocessing/build.log | 655 ++++++++++++++++++ .../geminiprocessing/cmake_configure.log | 37 + 38 files changed, 1755 insertions(+) create mode 100755 test/test_script_audioprocessing.sh create mode 100755 test/test_script_deeplearning.sh create mode 100755 test/test_script_geminiprocessing.sh create mode 100755 test/test_script_imageprocessing.sh create mode 100755 test/test_script_vectorizationprocessing.sh create mode 100644 test_result/deeplearning/build_results_crosscompile_summary.log create mode 100644 test_result/deeplearning/build_results_summary.log create mode 100644 test_result/deeplearning/dl-layer-ffn-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-rmsnorm-benchmark.log create mode 100644 test_result/deeplearning/dl-layer-selfattention-benchmark.log create mode 100644 test_result/deeplearning/dl-model-lenet-benchmark.log create mode 100644 test_result/deeplearning/dl-model-mobilenetv3-benchmark.log create mode 100644 test_result/deeplearning/dl-model-resnet18-benchmark.log create mode 100644 test_result/deeplearning/dl-model-tinyllama-benchmark.log create mode 100644 test_result/deeplearning/dl-model-whisper-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-matmul-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log create mode 100644 test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log create mode 100644 test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log create mode 100644 test_result/deeplearning/dl-op-tosa-transpose-benchmark.log create mode 100644 test_result/deeplearning/run_results_summary.log create mode 100644 test_result/geminiprocessing/build.log create mode 100644 test_result/geminiprocessing/cmake_configure.log diff --git a/test/test_script_audioprocessing.sh b/test/test_script_audioprocessing.sh new file mode 100755 index 00000000..e16ae655 --- /dev/null +++ b/test/test_script_audioprocessing.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +cd /home/buddy-complier-workspace/buddy-benchmark +mkdir -p build && cd build +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} +ninja dap-op-iir-benchmark +cd bin +./dap-op-iir-benchmark + + + +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DAUDIO_PROCESSING_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DKFR_DIR=/home/buddy-complier-workspace/buddy-benchmark/thirdparty/kfr \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DPYTHON_BINARY_DIR="$(dirname "$(which python3)")" + +ninja audio-plot +cd bin +./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# " +# root@4f445bb41579:/home/buddy-complier-workspace/buddy-benchmark/build/bin# ./audio-plot ../../benchmarks/AudioProcessing/Audios/NASA_Mars.wav ResultKFRIir.wav +# Plotting now... +# Traceback (most recent call last): +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plot.py", line 71, in +# compare_wave(args.file1, args.file2, part=args.part, +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 120, in compare_wave +# after, time2 = get_time_domain(file2) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 60, in get_time_domain +# info, samples = get_info_and_samples(file) +# File "/home/buddy-complier-workspace/buddy-benchmark/utils/plots/python/plotools/compare.py", line 38, in get_info_and_samples +# with wave.open(file, 'rb') as audio: +# File "/usr/lib/python3.10/wave.py", line 509, in open +# return Wave_read(f) +# File "/usr/lib/python3.10/wave.py", line 159, in __init__ +# f = builtins.open(f, 'rb') +# FileNotFoundError: [Errno 2] No such file or directory: 'ResultKFRIir.wav' +# " \ No newline at end of file diff --git a/test/test_script_deeplearning.sh b/test/test_script_deeplearning.sh new file mode 100755 index 00000000..f7c4d72e --- /dev/null +++ b/test/test_script_deeplearning.sh @@ -0,0 +1,221 @@ +#!/usr/bin/env bash + +################################################################################ +# 0. Script Setup +################################################################################ +# We disable "exit on error" so that if one benchmark fails to build or run, +# we can continue with the rest. +set +e + +################################################################################ +# 1. (Optional) Activate Python/Conda Environment +################################################################################ +# Uncomment or adjust if you use Anaconda/Miniconda: +# conda activate + + +################################################################################ +# 2. Build Each Benchmark (Continue Even If One Fails) +################################################################################ +BENCHMARK_TARGETS=( + # ------------------ + # Model-Level + # ------------------ + "dl-model-tinyllama-benchmark" + "dl-model-mobilenetv3-benchmark" + "dl-model-lenet-benchmark" + "dl-model-bert-benchmark" + "dl-model-whisper-benchmark" + "dl-model-resnet18-benchmark" + + # ------------------ + # Layer-Level + # ------------------ + "dl-layer-ffn-benchmark" + "dl-layer-selfattention-benchmark" + "dl-layer-rmsnorm-benchmark" + + # ------------------ + # Operation-Level + # ------------------ + "dl-op-linalg-matmul-benchmark" + "dl-op-linalg-conv2d-nchw-fchw-benchmark" + "dl-op-linalg-conv2d-nhwc-hwcf-benchmark" + "dl-op-linalg-conv2d-nhwc-fhwc-benchmark" + "dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark" + "dl-op-linalg-pooling-nhwc-sum-benchmark" + "dl-op-linalg-batch-matmul-benchmark" + "dl-op-linalg-arithaddf-benchmark" + "dl-op-linalg-arithdivf-benchmark" + "dl-op-linalg-arithmulf-benchmark" + "dl-op-linalg-arithnegf-benchmark" + "dl-op-linalg-arithsubf-benchmark" + "dl-op-linalg-mathfpow-benchmark" + "dl-op-linalg-mathrsqrt-benchmark" + "dl-op-linalg-mathexp-benchmark" + "dl-op-linalg-reduceaddf-benchmark" + "dl-op-linalg-reducemaxf-benchmark" + "dl-op-linalg-softmax-exp-sum-div-benchmark" + "dl-op-tosa-transpose-benchmark" + "dl-op-matmul-transpose-b-benchmark" +) + + +################################################################################ +# 3. Set Environment Variables for Buddy MLIR/LLVM +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 3. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" \ + -DCMAKE_CXX_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang++" \ + -DCMAKE_C_COMPILER="${LLVM_MLIR_BUILD_DIR}/bin/clang" \ + -DCMAKE_CXX_FLAGS="-march=native" \ + -DCMAKE_C_FLAGS="-march=native" + + +################################################################################ +# 4. Prepare Build Folder and Run CMake +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +mkdir -p $BENCHMARK_PATH/test_result/deeplearning +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + +################################################################################ +# 5. Run Each Benchmark & Redirect Output (Continue Even If One Fails) +################################################################################ +cd bin || exit 1 + +RUN_LOG="${BENCHMARK_PATH}/test_result/deeplearning/run_results_summary.log" +> "${RUN_LOG}" # Clear/create the file + +echo "[Info] Running all benchmarks in ./bin..." +for target in "${BENCHMARK_TARGETS[@]}"; do + if [ -f "${target}" ]; then + echo "==> Running ${target}" + if "./${target}" > "${BENCHMARK_PATH}/test_result/deeplearning/${target}.log" 2>&1; then + echo "[Success] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " Output saved to test_result/deeplearning/${target}.log" + else + echo "[Failed] Run of '${target}'" | tee -a "${RUN_LOG}" + echo " Output saved to test_result/deeplearning/${target}.log (May contain error info)" + fi + else + echo "[Missing] Executable not found for '${target}'" | tee -a "${RUN_LOG}" + fi +done + + +################################################################################ +# 6. Set Environment Variables for Buddy MLIR/LLVM for cross-compile +################################################################################ +# Adjust these paths according to your local setup: +BUDDY_MLIR_DIR="/home/buddy-complier-workspace/buddy-mlir" # The root directory of buddy-mlir +LLVM_BUILD_DIR="$BUDDY_MLIR_DIR/llvm/build" # The build dir for LLVM +BUDDY_BUILD_DIR="$BUDDY_MLIR_DIR/build" # The build dir for buddy-mlir + +# Export environment variables: +export BUDDY_MLIR_BUILD_DIR="$BUDDY_BUILD_DIR" +export LLVM_MLIR_BUILD_DIR="$LLVM_BUILD_DIR" +export PYTHONPATH="${LLVM_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_BUILD_DIR}/python_packages:${PYTHONPATH}" +export BUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_DIR}/../build +export RISCV_GNU_TOOLCHAIN=${BUDDY_MLIR_BUILD_DIR}/../thirdparty/riscv-gnu-toolchain +export RISCV_OMP_SHARED=${LLVM_MLIR_BUILD_DIR}/../build/lib/libomp.so +export BENCHMARK_PATH="${BUDDY_MLIR_DIR}/../buddy-benchmark" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] PYTHONPATH = ${PYTHONPATH}" + +################################################################################ +# 7. Prepare Build Folder and Run CMake +################################################################################ +cd "${BUDDY_MLIR_DIR}/../buddy-benchmark" || exit 1 +mkdir -p build +cd build || exit 1 + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DDEEP_LEARNING_BENCHMARKS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCROSS_COMPILE_RVV=ON \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=riscv \ + -DCMAKE_C_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang \ + -DRISCV_GNU_TOOLCHAIN=${RISCV_GNU_TOOLCHAIN} \ + -DCMAKE_CXX_COMPILER=${LLVM_MLIR_BUILD_DIR}/bin/clang++ \ + -DCMAKE_C_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DCMAKE_CXX_FLAGS="-march=rv64gcv --target=riscv64-unknown-linux-gnu --sysroot=${RISCV_GNU_TOOLCHAIN}/sysroot --gcc-toolchain=${RISCV_GNU_TOOLCHAIN} -fPIC" \ + -DRISCV_OMP_SHARED=${RISCV_OMP_SHARED} \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DBUDDY_MLIR_BUILD_CROSS_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR} \ + -DBUDDY_MLIR_CROSS_LIB_DIR=${BUDDY_MLIR_BUILD_CROSS_DIR}/lib + +################################################################################ +# 8. Prepare Build Folder and Run CMake for cross-compile +################################################################################ + +mkdir -p $BENCHMARK_PATH/test_result +BUILD_LOG="${BENCHMARK_PATH}/test_result/deeplearning/build_results_crosscompile_summary.log" +> "${BUILD_LOG}" # Clear/create the file + +echo "[Info] Building all benchmarks with Ninja..." +for target in "${BENCHMARK_TARGETS[@]}"; do + echo "==> ninja ${target}" + if ninja "${target}"; then + echo "[Success] Build of '${target}'" | tee -a "${BUILD_LOG}" + else + echo "[Failed] Build of '${target}'" | tee -a "${BUILD_LOG}" + fi +done + + +echo +echo "[Info] All build/run steps completed (script did not stop on failures)." +echo "[Info] Build summary: ${BUILD_LOG}" +echo "[Info] Run summary: ${RUN_LOG}" + + +cmake -G Ninja .. \ + -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \ + -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \ + -DPython3_EXECUTABLE=$(which python3) \ No newline at end of file diff --git a/test/test_script_geminiprocessing.sh b/test/test_script_geminiprocessing.sh new file mode 100755 index 00000000..b151cb5b --- /dev/null +++ b/test/test_script_geminiprocessing.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +export BUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build +export LLVM_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/llvm/build +export CHIPYARD_DIR=/home/buddy-complier-workspace/chipyard +export BUDDY_BENCHMARK_DIR=/home/buddy-complier-workspace/buddy-benchmark + +cd "${CHIPYARD_DIR}" +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard +git checkout 1.8.1 + +# Initialize and update the 'generators/gemmini' submodule and any submodules inside it. +git config --global --add safe.directory /home/buddy-complier-workspace/chipyard/generators/gemmini +git submodule update --init --recursive generators/gemmini + +############################################# +# 1. Initialize Conda for the current shell +############################################# +conda init bash # or "conda init" if you’re already in a bash shell + +############################################# +# 2. Check if 'chipyard' environment exists +############################################# +if conda env list | grep -qE '^[^ ]*\s+chipyard\s'; then + echo "[INFO] Found existing 'chipyard' environment. Activating it." +else + echo "[INFO] 'chipyard' environment not found. Creating it..." + # Example creation command - adjust packages as needed + conda create -y -n chipyard python=3.10 \ + cmake ninja \ + # plus any other dependencies needed... +fi + +conda activate chipyard + +############################################# +# 3. Source build-setup and env.sh +############################################# +# If your script uses conda-lock or has pinned requirements, +# you might need to call build-setup.sh so it *creates* the +# .conda-env environment. But be sure it doesn’t conflict +# with your newly created 'chipyard' environment. +source build-setup.sh esp-tools +source env.sh + +############################################# +# 4. Proceed with your build +############################################# +cd "${BUDDY_BENCHMARK_DIR}" +rm -rf build +# Remove any existing build directory and create a fresh one. +mkdir -p build && cd build + +RESULT_DIR="${BUDDY_BENCHMARK_DIR}/test_result/geminiprocessing" +mkdir -p "${RESULT_DIR}" + +export C_PATH=$(which riscv64-unknown-linux-gnu-gcc) +export CXX_PATH=$(which riscv64-unknown-linux-gnu-g++) +export CLinker_PATH=$(which riscv64-unknown-linux-gnu-ld) + +# Print Address here +echo "[Info] C_COMPILER_PATH = ${C_PATH}" +echo "[Info] CXX_COMPILER_PATH = ${CXX_PATH}" +echo "[Info] C_LINKER_PATH = ${CLinker_PATH}" +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" +echo "[Info] CHIPYARD_DIR = ${CHIPYARD_DIR}" +echo "[Info] BUDDY_BENCHMARK_DIR = ${BUDDY_BENCHMARK_DIR}" +echo "[Info] RESULT_DIR = ${RESULT_DIR}" + +echo "[Info] Running CMake configuration..." +cmake -G Ninja .. \ + -DCMAKE_C_COMPILER=${C_PATH} \ + -DCMAKE_CXX_COMPILER=${CXX_PATH} \ + -DCMAKE_LINKER=${CLinker_PATH} \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DBUDDY_MLIR_BUILD_DIR=${BUDDY_MLIR_BUILD_DIR} \ + -DGEMMINI_INCLUDE_DIR=${CHIPYARD_DIR}/generators/gemmini/software/gemmini-rocc-tests/include/ \ + -DGEMMINI_BENCHMARKS=ON \ + 2>&1 | tee "${RESULT_DIR}/cmake_configure.log" + +ninja 2>&1 | tee "${RESULT_DIR}/build.log" + +# ```[1/21] Creating directories for 'project_googlebenchmark' +# [2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +# riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +# /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given +# 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); +# | ^ +# In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23:``` + +# cd bin +# ./vectorization-matrix-benchmark 2>&1 | tee "${RESULT_DIR}/run.log" + +echo "[Info] CMake, build, and run logs are stored in ${RESULT_DIR}" diff --git a/test/test_script_imageprocessing.sh b/test/test_script_imageprocessing.sh new file mode 100755 index 00000000..ab89c912 --- /dev/null +++ b/test/test_script_imageprocessing.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# NEW: Create results directory and update log file path +RESULT_DIR="${PWD}/test_result/imageprocessing" +mkdir -p "$RESULT_DIR" +LOG="${RESULT_DIR}/image-processing-result.log" +echo "Benchmark results - $(date)" > "$LOG" + +# Function to check CPU flag support +supports() { + local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]') + if grep -qi "$flag" /proc/cpuinfo; then + return 0 + else + return 1 + fi +} + +features=("SSE" "AVX2" "AVX512" "NEON") +images=("../benchmarks/ImageProcessing/Images/YuTu.png") +kernels=("prewittKernelAlign" "sobel3x3KernelAlign" "sobel5x5KernelAlign" "sobel7x7KernelAlign" "sobel9x9KernelAlign" "laplacianKernelAlign" "logKernelAlign") +kernelmorphs=("random3x3KernelAlignInt") +boundaries=("CONSTANT_PADDING" "REPLICATE_PADDING") + +for feature in "${features[@]}"; do + echo "Testing $feature support" | tee -a "$LOG" + if supports "$feature"; then + echo "$feature is supported." | tee -a "$LOG" + mkdir -p build_${feature} && cd build_${feature} + cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DIMAGE_PROCESSING_BENCHMARKS=ON \ + -DOpenCV_DIR=$PWD/../thirdparty/opencv/build/ \ + -DEIGEN_DIR=$PWD/../thirdparty/eigen/ \ + -DBUDDY_OPT_ATTR=$(echo "$feature" | tr '[:upper:]' '[:lower:]') \ + -DBUDDY_MLIR_BUILD_DIR=/home/buddy-complier-workspace/buddy-mlir/build + ninja image-processing-benchmark + echo "Running image-processing-benchmark for $feature" | tee -a "$LOG" + for img in "${images[@]}"; do + for kern in "${kernels[@]}"; do + for morph in "${kernelmorphs[@]}"; do + for boundary in "${boundaries[@]}"; do + echo "Running: $img $kern $morph $boundary" | tee -a "$LOG" + ./bin/image-processing-benchmark "$img" "$kern" "$morph" "$boundary" 2>&1 | grep -v "Saved PNG file." >> "$LOG" + done + done + done + done + cd .. + else + echo "CPU does not support $feature." | tee -a "$LOG" + fi +done + +# NEW: Clean up build directories +for feature in "${features[@]}"; do + rm -rf "build_${feature}" +done \ No newline at end of file diff --git a/test/test_script_vectorizationprocessing.sh b/test/test_script_vectorizationprocessing.sh new file mode 100755 index 00000000..97899922 --- /dev/null +++ b/test/test_script_vectorizationprocessing.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +################################################################################ +# 1. Script Setup +################################################################################ +set -e +BUDDY_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/build" +LLVM_MLIR_BUILD_DIR="/home/buddy-complier-workspace/buddy-mlir/llvm/build" + +echo "[Info] BUDDY_MLIR_BUILD_DIR = ${BUDDY_MLIR_BUILD_DIR}" +echo "[Info] LLVM_MLIR_BUILD_DIR = ${LLVM_MLIR_BUILD_DIR}" + +RESULT_DIR="${PWD}/test_result/vectorization" +mkdir -p "${RESULT_DIR}" +LOG_FILE="${RESULT_DIR}/vectorization_result.log" +echo "Vectorization Benchmark - $(date)" > "${LOG_FILE}" + +################################################################################ +# 2. Build Benchmark +################################################################################ +mkdir -p build && cd build +echo "[Info] Running CMake configuration..." | tee -a "${LOG_FILE}" +cmake -G Ninja .. \ + -DCMAKE_BUILD_TYPE=RELEASE \ + -DVECTORIZATION_BENCHMARKS=ON \ + -DBUDDY_MLIR_BUILD_DIR="${BUDDY_MLIR_BUILD_DIR}" 2>&1 | tee -a "${LOG_FILE}" + +echo "[Info] Building vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +ninja vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" + +################################################################################ +# 3. Run Benchmark +################################################################################ +cd bin +echo "[Info] Running vectorization-matrix-benchmark..." | tee -a "${LOG_FILE}" +./vectorization-matrix-benchmark 2>&1 | tee -a "${LOG_FILE}" + +echo "[Info] Benchmark completed. Log saved to ${LOG_FILE}" \ No newline at end of file diff --git a/test_result/deeplearning/build_results_crosscompile_summary.log b/test_result/deeplearning/build_results_crosscompile_summary.log new file mode 100644 index 00000000..df608647 --- /dev/null +++ b/test_result/deeplearning/build_results_crosscompile_summary.log @@ -0,0 +1,23 @@ +[Failed] Build of 'dl-model-tinyllama-benchmark' +[Failed] Build of 'dl-model-mobilenetv3-benchmark' +[Failed] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Failed] Build of 'dl-model-whisper-benchmark' +[Failed] Build of 'dl-model-resnet18-benchmark' +[Failed] Build of 'dl-layer-ffn-benchmark' +[Failed] Build of 'dl-layer-selfattention-benchmark' +[Failed] Build of 'dl-layer-rmsnorm-benchmark' +[Failed] Build of 'dl-op-linalg-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Failed] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Failed] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Failed] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Failed] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Failed] Build of 'dl-op-linalg-arithaddf-benchmark' +[Failed] Build of 'dl-op-linalg-arithdivf-benchmark' +[Failed] Build of 'dl-op-linalg-arithmulf-benchmark' +[Failed] Build of 'dl-op-linalg-arithnegf-benchmark' +[Failed] Build of 'dl-op-linalg-arithsubf-benchmark' +[Failed] Build of 'dl-op-linalg-mathfpow-benchmark' +[Failed] Build of 'dl-op-linalg-mathrsqrt-benchmark' diff --git a/test_result/deeplearning/build_results_summary.log b/test_result/deeplearning/build_results_summary.log new file mode 100644 index 00000000..de1252ba --- /dev/null +++ b/test_result/deeplearning/build_results_summary.log @@ -0,0 +1,29 @@ +[Success] Build of 'dl-model-tinyllama-benchmark' +[Success] Build of 'dl-model-mobilenetv3-benchmark' +[Success] Build of 'dl-model-lenet-benchmark' +[Failed] Build of 'dl-model-bert-benchmark' +[Success] Build of 'dl-model-whisper-benchmark' +[Success] Build of 'dl-model-resnet18-benchmark' +[Success] Build of 'dl-layer-ffn-benchmark' +[Success] Build of 'dl-layer-selfattention-benchmark' +[Success] Build of 'dl-layer-rmsnorm-benchmark' +[Success] Build of 'dl-op-linalg-matmul-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Build of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Build of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Build of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Success] Build of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Build of 'dl-op-linalg-arithaddf-benchmark' +[Success] Build of 'dl-op-linalg-arithdivf-benchmark' +[Success] Build of 'dl-op-linalg-arithmulf-benchmark' +[Success] Build of 'dl-op-linalg-arithnegf-benchmark' +[Success] Build of 'dl-op-linalg-arithsubf-benchmark' +[Success] Build of 'dl-op-linalg-mathfpow-benchmark' +[Success] Build of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Build of 'dl-op-linalg-mathexp-benchmark' +[Success] Build of 'dl-op-linalg-reduceaddf-benchmark' +[Success] Build of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Build of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Success] Build of 'dl-op-tosa-transpose-benchmark' +[Success] Build of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/deeplearning/dl-layer-ffn-benchmark.log b/test_result/deeplearning/dl-layer-ffn-benchmark.log new file mode 100644 index 00000000..677ea1bb --- /dev/null +++ b/test_result/deeplearning/dl-layer-ffn-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:12:58+00:00 +Running ./dl-layer-ffn-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------- +DL_LAYER_FFN/Scalar 0.065 ms 0.065 ms 10641 +DL_LAYER_FFN/Auto_Vectorization 0.027 ms 0.027 ms 26024 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log new file mode 100644 index 00000000..2ce19761 --- /dev/null +++ b/test_result/deeplearning/dl-layer-rmsnorm-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:13:02+00:00 +Running ./dl-layer-rmsnorm-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------------------------------ +DL_LAYER_RMSNORM/Scalar 0.002 ms 0.002 ms 356344 +DL_LAYER_RMSNORM/Auto_Vectorization 0.001 ms 0.001 ms 764783 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-layer-selfattention-benchmark.log b/test_result/deeplearning/dl-layer-selfattention-benchmark.log new file mode 100644 index 00000000..17623060 --- /dev/null +++ b/test_result/deeplearning/dl-layer-selfattention-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:13:00+00:00 +Running ./dl-layer-selfattention-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.06 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +DL_LAYER_ATTENTION/Scalar 4.68 ms 4.68 ms 150 +DL_LAYER_ATTENTION/Auto_Vectorization 1.57 ms 1.57 ms 455 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-lenet-benchmark.log b/test_result/deeplearning/dl-model-lenet-benchmark.log new file mode 100644 index 00000000..a1ce7074 --- /dev/null +++ b/test_result/deeplearning/dl-model-lenet-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:09:01+00:00 +Running ./dl-model-lenet-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.86, 7.56 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------- +DL_MODEL_LENET/Auto_Vectorization 0.164 ms 0.164 ms 4368 +DL_MODEL_LENET/Buddy_Vectorization 0.154 ms 0.154 ms 5094 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log new file mode 100644 index 00000000..8bed1b85 --- /dev/null +++ b/test_result/deeplearning/dl-model-mobilenetv3-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:08:59+00:00 +Running ./dl-model-mobilenetv3-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.86, 7.56 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------- +BM_MobileNet_V3/BM_MobileNet_V3_scalar 36.7 ms 36.7 ms 18 +BM_MobileNet_V3/BM_MobileNet_V3_conv_opt 32.6 ms 32.6 ms 22 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-resnet18-benchmark.log b/test_result/deeplearning/dl-model-resnet18-benchmark.log new file mode 100644 index 00000000..e95722a5 --- /dev/null +++ b/test_result/deeplearning/dl-model-resnet18-benchmark.log @@ -0,0 +1,18 @@ +2025-03-30T12:12:55+00:00 +Running ./dl-model-resnet18-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.39, 6.08 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +DL_MODEL_Resnet18/Auto_Vectorization 723 ms 722 ms 1 +DL_MODEL_Resnet18/Buddy_Vectorization 726 ms 718 ms 1 +----------------------------------------------------------- +Correctness Verification: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-model-tinyllama-benchmark.log b/test_result/deeplearning/dl-model-tinyllama-benchmark.log new file mode 100644 index 00000000..e07df494 --- /dev/null +++ b/test_result/deeplearning/dl-model-tinyllama-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:03:25+00:00 +Running ./dl-model-tinyllama-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 2.75, 3.61, 10.42 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +---------------------------------------------------------------------------- +Benchmark Time CPU Iterations +---------------------------------------------------------------------------- +DL_MODEL_TINYLLAMA/scalar 158531 ms 158516 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt 9744 ms 9735 ms 1 +DL_MODEL_TINYLLAMA/matmul_opt_omp 7716 ms 7038 ms 1 +---------- Verification ---------- +matmul_opt PASS +matmul_opt_omp PASS diff --git a/test_result/deeplearning/dl-model-whisper-benchmark.log b/test_result/deeplearning/dl-model-whisper-benchmark.log new file mode 100644 index 00000000..e4534bbc --- /dev/null +++ b/test_result/deeplearning/dl-model-whisper-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:09:03+00:00 +Running ./dl-model-whisper-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.04, 1.84, 7.52 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_MODEL_Whisper/Auto_Vectorization 78390 ms 78388 ms 1 +DL_MODEL_Whisper/Buddy_Vectorization 36641 ms 36637 ms 1 +----------------------------------------------------------- +Correctness Verification for Output1: PASS +Correctness Verification for Output2: FAIL +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log new file mode 100644 index 00000000..d89cd1e1 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithaddf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:31+00:00 +Running ./dl-op-linalg-arithaddf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.35, 5.89 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_ADDF_SCALAR 0.030 ms 0.030 ms 23440 +BM_ADDF_AutoVectorization 0.004 ms 0.004 ms 175032 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log new file mode 100644 index 00000000..02d6e568 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithdivf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:33+00:00 +Running ./dl-op-linalg-arithdivf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_DIVF_SCALAR 0.029 ms 0.029 ms 23951 +BM_DIVF_AutoVectorization 0.009 ms 0.009 ms 73837 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log new file mode 100644 index 00000000..4fa4ffde --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithmulf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:35+00:00 +Running ./dl-op-linalg-arithmulf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_MULF_SCALAR 0.029 ms 0.029 ms 23549 +BM_MULF_AutoVectorization 0.004 ms 0.004 ms 174752 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log new file mode 100644 index 00000000..e6387a2a --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithnegf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:37+00:00 +Running ./dl-op-linalg-arithnegf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.87 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_NEGF_SCALAR 0.022 ms 0.022 ms 30658 +BM_NEGF_AutoVectorization 0.003 ms 0.003 ms 245490 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log new file mode 100644 index 00000000..3a9efa27 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-arithsubf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:39+00:00 +Running ./dl-op-linalg-arithsubf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.84 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_SUBF_SCALAR 0.029 ms 0.029 ms 23697 +BM_SUBF_AutoVectorization 0.004 ms 0.004 ms 147910 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log new file mode 100644 index 00000000..d187e4d9 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-batch-matmul-benchmark.log @@ -0,0 +1,25 @@ +2025-03-30T12:13:21+00:00 +Running ./dl-op-linalg-batch-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------------------- +DL_OPS_BATCH_MATMUL/Scalar/iterations:1 3525 ms 3517 ms 1 +DL_OPS_BATCH_MATMUL/AutoVectorization/iterations:1 976 ms 976 ms 1 +DL_OPS_BATCH_MATMUL/Vectorization/iterations:1 189 ms 189 ms 1 +DL_OPS_BATCH_MATMUL/Tile/iterations:1 109 ms 109 ms 1 +DL_OPS_BATCH_MATMUL/SCF/iterations:1 117 ms 117 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST/iterations:1 353 ms 353 ms 1 +DL_OPS_BATCH_MATMUL/BROADCAST_OMP/iterations:1 75.4 ms 38.2 ms 1 +---------- Verification ---------- +Tile PASS +SCF PASS +BROADCAST PASS +BROADCAST_OMP PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log new file mode 100644 index 00000000..4e58a246 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nchw-fchw-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:15+00:00 +Running ./dl-op-linalg-conv2d-nchw-fchw-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.37, 5.97 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------- +BM_Conv2DNchwFchw_SCALAR 283 ms 283 ms 2 +BM_Conv2DNchwFchw_Im2col 10.2 ms 10.2 ms 68 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log new file mode 100644 index 00000000..08115149 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-fhwc-benchmark.log @@ -0,0 +1,21 @@ +2025-03-30T12:13:18+00:00 +Running ./dl-op-linalg-conv2d-nhwc-fhwc-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------------------------- +DL_OPS_CONV_2D_NHWC_FHWC/scalar/iterations:5 73.5 ms 73.5 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/auto_vectorization/iterations:5 9.35 ms 9.35 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vectorization/iterations:5 1.74 ms 1.74 ms 5 +DL_OPS_CONV_2D_NHWC_FHWC/vec_tile/iterations:5 1.73 ms 1.73 ms 5 +---------- Verification ---------- +auto_vectorization PASS +vectorization PASS +vec_tile PASS diff --git a/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log new file mode 100644 index 00000000..f09a4101 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-conv2d-nhwc-hwcf-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:17+00:00 +Running ./dl-op-linalg-conv2d-nhwc-hwcf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.37, 5.97 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------------------- +BM_CONV_2D_NHWC_HWCF_SCALAR 32.4 ms 32.4 ms 22 +BM_CONV_2D_NHWC_HWCF_AutoVectorization 5.83 ms 5.83 ms 120 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log new file mode 100644 index 00000000..c761a6b6 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:19+00:00 +Running ./dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------------------------------------ +Benchmark Time CPU Iterations +------------------------------------------------------------------------------------------------------------ +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/scalar/iterations:5 6.25 ms 6.25 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/auto_vectorization/iterations:5 1.71 ms 1.71 ms 5 +DL_OPS_DEPTHWISE_CONV_2D_NHWC_HWC/vectorization/iterations:5 0.128 ms 0.128 ms 5 +---------- Verification ---------- +auto_vectorization PASS +vectorization PASS diff --git a/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log new file mode 100644 index 00000000..c3ecd554 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathexp-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:45+00:00 +Running ./dl-op-linalg-mathexp-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------- +BM_EXP_SCALAR 0.046 ms 0.046 ms 15309 +BM_EXP_AutoVectorization 0.032 ms 0.032 ms 21998 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log new file mode 100644 index 00000000..018b3377 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathfpow-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:42+00:00 +Running ./dl-op-linalg-mathfpow-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.34, 5.84 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------- +BM_FPOW_SCALAR 0.084 ms 0.084 ms 8347 +BM_FPOW_AutoVectorization 0.057 ms 0.057 ms 12328 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log new file mode 100644 index 00000000..bf045f07 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-mathrsqrt-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:43+00:00 +Running ./dl-op-linalg-mathrsqrt-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +--------------------------------------------------------------------- +Benchmark Time CPU Iterations +--------------------------------------------------------------------- +BM_RSQRT_SCALAR 0.073 ms 0.073 ms 9497 +BM_RSQRT_AutoVectorization 0.004 ms 0.004 ms 161025 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log new file mode 100644 index 00000000..412446c6 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-matmul-benchmark.log @@ -0,0 +1,22 @@ +2025-03-30T12:13:04+00:00 +Running ./dl-op-linalg-matmul-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.38, 6.03 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------- +DL_OPS_MATMUL/scalar_O0/iterations:1 3716 ms 3716 ms 1 +DL_OPS_MATMUL/scalar_O3/iterations:1 3312 ms 3312 ms 1 +DL_OPS_MATMUL/tile/iterations:1 117 ms 117 ms 1 +DL_OPS_MATMUL/vec/iterations:1 140 ms 140 ms 1 +DL_OPS_MATMUL/vec_omp/iterations:1 20.5 ms 18.8 ms 1 +---------- Verification ---------- +tile PASS +vec PASS +vec_omp PASS diff --git a/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log new file mode 100644 index 00000000..f9296017 --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-pooling-nhwc-sum-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:19+00:00 +Running ./dl-op-linalg-pooling-nhwc-sum-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.36, 5.95 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +BM_POOLING_NHWC_SUM_SCALAR 0.233 ms 0.233 ms 3007 +BM_POOLING_NHWC_SUM_AutoVectorization 0.042 ms 0.042 ms 16752 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log new file mode 100644 index 00000000..1e8bcc7f --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reduceaddf-benchmark.log @@ -0,0 +1,10 @@ +2025-03-30T12:13:47+00:00 +Running ./dl-op-linalg-reduceaddf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.00, 1.33, 5.81 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log new file mode 100644 index 00000000..7ed900ff --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-reducemaxf-benchmark.log @@ -0,0 +1,10 @@ +2025-03-30T12:13:48+00:00 +Running ./dl-op-linalg-reducemaxf-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. diff --git a/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log new file mode 100644 index 00000000..37b85c1d --- /dev/null +++ b/test_result/deeplearning/dl-op-linalg-softmax-exp-sum-div-benchmark.log @@ -0,0 +1,19 @@ +2025-03-30T12:13:48+00:00 +Running ./dl-op-linalg-softmax-exp-sum-div-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +BM_SOFTMAXEXPSUMDIV_SCALAR 0.006 ms 0.006 ms 124261 +BM_SOFTMAXEXPSUMDIV_AutoVectorization 0.004 ms 0.004 ms 182159 +----------------------------------------------------------- +Correctness Verification: +Transform case: PASS +----------------------------------------------------------- diff --git a/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log new file mode 100644 index 00000000..ac6c4e30 --- /dev/null +++ b/test_result/deeplearning/dl-op-matmul-transpose-b-benchmark.log @@ -0,0 +1,21 @@ +2025-03-30T12:13:50+00:00 +Running ./dl-op-matmul-transpose-b-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +----------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------------------- +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O0/iterations:5 1046 ms 1044 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3/iterations:5 277 ms 277 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/scalar_O3_omp/iterations:5 32.4 ms 21.2 ms 5 +DL_OPS_MATMUL_TRANSPOSE_B/vec/iterations:5 84.6 ms 84.6 ms 5 +---------- Verification ---------- +scalar_O3 PASS +scalar_O3_omp PASS +vec PASS diff --git a/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log new file mode 100644 index 00000000..aec2390a --- /dev/null +++ b/test_result/deeplearning/dl-op-tosa-transpose-benchmark.log @@ -0,0 +1,17 @@ +2025-03-30T12:13:50+00:00 +Running ./dl-op-tosa-transpose-benchmark +Run on (24 X 5100 MHz CPU s) +CPU Caches: + L1 Data 48 KiB (x12) + L1 Instruction 32 KiB (x12) + L2 Unified 1280 KiB (x12) + L3 Unified 30720 KiB (x1) +Load Average: 1.08, 1.34, 5.79 +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +------------------------------------------------------------------------------------- +DL_OPS_TRANSPOSE_2D/scalar_O0/iterations:5 25.4 ms 20.6 ms 5 +DL_OPS_TRANSPOSE_2D/scalar_O3/iterations:5 19.2 ms 14.2 ms 5 +---------- Verification ---------- +scalar_O3 PASS diff --git a/test_result/deeplearning/run_results_summary.log b/test_result/deeplearning/run_results_summary.log new file mode 100644 index 00000000..ce1a088d --- /dev/null +++ b/test_result/deeplearning/run_results_summary.log @@ -0,0 +1,29 @@ +[Success] Run of 'dl-model-tinyllama-benchmark' +[Success] Run of 'dl-model-mobilenetv3-benchmark' +[Success] Run of 'dl-model-lenet-benchmark' +[Missing] Executable not found for 'dl-model-bert-benchmark' +[Success] Run of 'dl-model-whisper-benchmark' +[Success] Run of 'dl-model-resnet18-benchmark' +[Success] Run of 'dl-layer-ffn-benchmark' +[Success] Run of 'dl-layer-selfattention-benchmark' +[Success] Run of 'dl-layer-rmsnorm-benchmark' +[Success] Run of 'dl-op-linalg-matmul-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nchw-fchw-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nhwc-hwcf-benchmark' +[Success] Run of 'dl-op-linalg-conv2d-nhwc-fhwc-benchmark' +[Success] Run of 'dl-op-linalg-depthwise-conv-2d-nhwc-hwc-benchmark' +[Success] Run of 'dl-op-linalg-pooling-nhwc-sum-benchmark' +[Success] Run of 'dl-op-linalg-batch-matmul-benchmark' +[Success] Run of 'dl-op-linalg-arithaddf-benchmark' +[Success] Run of 'dl-op-linalg-arithdivf-benchmark' +[Success] Run of 'dl-op-linalg-arithmulf-benchmark' +[Success] Run of 'dl-op-linalg-arithnegf-benchmark' +[Success] Run of 'dl-op-linalg-arithsubf-benchmark' +[Success] Run of 'dl-op-linalg-mathfpow-benchmark' +[Success] Run of 'dl-op-linalg-mathrsqrt-benchmark' +[Success] Run of 'dl-op-linalg-mathexp-benchmark' +[Failed] Run of 'dl-op-linalg-reduceaddf-benchmark' +[Failed] Run of 'dl-op-linalg-reducemaxf-benchmark' +[Success] Run of 'dl-op-linalg-softmax-exp-sum-div-benchmark' +[Success] Run of 'dl-op-tosa-transpose-benchmark' +[Success] Run of 'dl-op-matmul-transpose-b-benchmark' diff --git a/test_result/geminiprocessing/build.log b/test_result/geminiprocessing/build.log new file mode 100644 index 00000000..8473f261 --- /dev/null +++ b/test_result/geminiprocessing/build.log @@ -0,0 +1,655 @@ +[1/21] Creating directories for 'project_googlebenchmark' +[2/21] Building C object benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +FAILED: benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o +riscv64-unknown-linux-gnu-gcc -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../frontend/Interfaces -I/home/buddy-complier-workspace/buddy-mlir/build/cmake/../../thirdparty/include -I/home/buddy-complier-workspace/buddy-benchmark/benchmarks -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include -I/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/.. -I/home/xychen/buddy-mlir/frontend/Interfaces -O3 -DNDEBUG -MD -MT benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -MF benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o.d -o benchmarks/Gemmini/Ops/MatMulOp/CMakeFiles/ExoMatMul.dir/ExoMatmul.c.o -c /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c: In function '_exo_matmul_4': +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:47: error: macro "gemmini_extended_config_ex" requires 7 arguments, but only 6 given + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^ +In file included from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:251: note: macro "gemmini_extended_config_ex" defined here + 251 | #define gemmini_extended_config_ex(dataflow, sys_act, sys_shift, relu6_shift, A_stride, A_transpose, B_transpose) \ + | +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: error: 'gemmini_extended_config_ex' undeclared (first use in this function) + 28 | gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:28:3: note: each undeclared identifier is reported only once for each function it appears in +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:35:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 35 | int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * 4 * sizeof(int32_t))); + | ^ +In file included from /home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:20, + from /home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:23: +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:119: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:66:9: note: in expansion of macro 'gemmini_extended_preload' + 66 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:67:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 67 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:125: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:68:9: note: in expansion of macro 'gemmini_extended_preload' + 68 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:69:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 69 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:70:9: note: in expansion of macro 'gemmini_extended_preload' + 70 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:71:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 71 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:133: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:72:9: note: in expansion of macro 'gemmini_extended_preload' + 72 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:73:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 73 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:126: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:74:9: note: in expansion of macro 'gemmini_extended_preload' + 74 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:75:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 75 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:132: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:76:9: note: in expansion of macro 'gemmini_extended_preload' + 76 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:77:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 77 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:78:9: note: in expansion of macro 'gemmini_extended_preload' + 78 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:79:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 79 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:80:9: note: in expansion of macro 'gemmini_extended_preload' + 80 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + 1024 + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:81:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 81 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + 256)/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:82:9: note: in expansion of macro 'gemmini_extended_preload' + 82 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:83:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 83 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:84:9: note: in expansion of macro 'gemmini_extended_preload' + 84 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:85:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 85 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:86:9: note: in expansion of macro 'gemmini_extended_preload' + 86 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:87:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 87 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:88:9: note: in expansion of macro 'gemmini_extended_preload' + 88 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (2) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:89:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 89 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (2) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:134: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:90:9: note: in expansion of macro 'gemmini_extended_preload' + 90 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:91:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 91 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:140: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:92:9: note: in expansion of macro 'gemmini_extended_preload' + 92 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:93:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 93 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:94:9: note: in expansion of macro 'gemmini_extended_preload' + 94 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:95:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 95 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:34: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:148: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:232:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 232 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), ((uint64_t)(C_rows) << (ADDR_LEN + 16)) | ((uint64_t)(C_cols) << ADDR_LEN) | (uint64_t)(C), k_PRELOAD) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:96:9: note: in expansion of macro 'gemmini_extended_preload' + 96 | gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)b)) + ((j) * (4096) + (3) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16))) | 0x40000000, (16), (16), (16), (16)); + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:44: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:15: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:219:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 219 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, ((uint64_t)(A_rows) << (ADDR_LEN + 16)) | ((uint64_t)(A_cols) << ADDR_LEN) | (uint64_t)(A), ((uint64_t)(BD_rows) << (ADDR_LEN + 16)) | ((uint64_t)(BD_cols) << ADDR_LEN) | (uint64_t)(BD), k_COMPUTE_PRELOADED) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:97:9: note: in expansion of macro 'gemmini_extended_compute_preloaded' + 97 | gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)a)) + ((i) * (1024) + (3) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:89: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:98:9: note: in expansion of macro 'gemmini_extended_mvout' + 98 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:99:9: note: in expansion of macro 'gemmini_extended_mvout' + 99 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 16 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + 256)/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:100:9: note: in expansion of macro 'gemmini_extended_mvout' + 100 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 32 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (2) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:94: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/../rocc-software/src/xcustom.h:152:25: note: in definition of macro 'ROCC_INSTRUCTION_0_R_R' + 152 | : "r"(rs1), "r"(rs2)); \ + | ^~~ +/home/buddy-complier-workspace/chipyard/generators/gemmini/software/gemmini-rocc-tests/include/gemmini.h:212:3: note: in expansion of macro 'ROCC_INSTRUCTION_RS1_RS2' + 212 | ROCC_INSTRUCTION_RS1_RS2(XCUSTOM_ACC, dram_addr, ((uint64_t)(rows) << (ADDR_LEN + 16)) | ((uint64_t)(cols) << ADDR_LEN) | (uint64_t)(spad_addr), k_MVOUT) + | ^~~~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:101:9: note: in expansion of macro 'gemmini_extended_mvout' + 101 | gemmini_extended_mvout( ((uint64_t) &C[(16 * i + 3136 * io) * (256) + 48 + 64 * j]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((j) * (1024) + (3) * (256))/16)), (16), (16) ); + | ^~~~~~~~~~~~~~~~~~~~~~ +/home/buddy-complier-workspace/buddy-benchmark/benchmarks/Gemmini/Ops/MatMulOp/ExoMatmul.c:105:17: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] + 105 | gemm_acc_free((uint32_t)(res)); + | ^ +[3/21] Generating buddy_matmul.o +[4/21] Building CXX object benchmarks/Gemmini/ResNet-101/CMakeFiles/CRunnerUtils.dir/CRunnerUtils.cpp.o +[5/21] Performing download step (git clone) for 'project_googlebenchmark' +Cloning into 'project_googlebenchmark'... +HEAD is now at f91b6b4 bump version to 1.6 in preparation for release +[6/21] Generating resnet-101.o +ninja: build stopped: subcommand failed. diff --git a/test_result/geminiprocessing/cmake_configure.log b/test_result/geminiprocessing/cmake_configure.log new file mode 100644 index 00000000..a3a42f37 --- /dev/null +++ b/test_result/geminiprocessing/cmake_configure.log @@ -0,0 +1,37 @@ +-- The CXX compiler identification is GNU 9.2.0 +-- The C compiler identification is GNU 9.2.0 +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Check for working CXX compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-g++ - skipped +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Check for working C compiler: /home/buddy-complier-workspace/chipyard/.conda-env/esp-tools/bin/riscv64-unknown-linux-gnu-gcc - skipped +-- Detecting C compile features +-- Detecting C compile features - done +-- Configuring Target Architecture: avx512f +-- Configuring Target Triple: x86_64-unknown-linux-gnu +-- Configuring benchmarks: google +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD +-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Performing Test HAVE_SSE +-- Performing Test HAVE_SSE - Failed +-- SSE support - no +-- Performing Test HAVE_AVX2 +-- Performing Test HAVE_AVX2 - Failed +-- AVX2 support - no +-- Performing Test HAVE_AVX512 +-- Performing Test HAVE_AVX512 - Failed +-- AVX512 support - no +-- Performing Test HAVE_NEON +-- Performing Test HAVE_NEON - Failed +-- Arm Neon support - no +-- Configuring done +-- Generating done +-- Build files have been written to: /home/buddy-complier-workspace/buddy-benchmark/build