buddy-compiler · shirohasuki · Sep 8, 2024 · Sep 8, 2024 · Sep 8, 2024 · Oct 31, 2024
diff --git a/BuddyGemmini/.gitignore b/BuddyGemmini/.gitignore
@@ -0,0 +1,7 @@
+*.data
+__pycache__
+*.pyc
+/build
+forward.mlir
+subgraph0.mlir
+*.o
diff --git a/BuddyGemmini/CMakeLists.txt b/BuddyGemmini/CMakeLists.txt
@@ -0,0 +1,111 @@
+set(BUDDY_EXAMPLES_DIR ${BUDDY_MLIR_DIR}/examples/)
+set(BUDDY_BINARY_DIR ${BUDDY_MLIR_DIR}/build/bin/)
+set(RISCV_GNU_TOOLCHAIN ${BUDDY_MLIR_DIR}/build/thirdparty/riscv-gnu-toolchain)
+set(CMAKE_CXX_COMPILER  ${RISCV_GNU_TOOLCHAIN}/bin/riscv64-unknown-linux-gnu-g++)
+
+set(BUDDY_GEMMINI_DIR ${BUDDY_EXAMPLES_DIR}/BuddyGemmini)
+set(INTERFACE_DIR ${BUDDY_MLIR_DIR}/frontend/Interfaces/)
+set(INCLUDE_DIR ${BUDDY_GEMMINI_DIR}/include/)
+
+
+add_custom_command(
+  OUTPUT  forward.mlir subgraph0.mlir arg0.data
+  COMMAND export BUDDYGEMMINI_EXAMPLE_PATH=${BUDDY_GEMMINI_DIR} &&
+          python3 ${BUDDY_GEMMINI_DIR}/buddy-lenet-import.py
+  DEPENDS buddy-lenet-import.py
+  COMMENT "Generating forward.mlir, subgraph0.mlir and parameter files"
+)
+
+add_custom_command(
+  OUTPUT  forward.o
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_GEMMINI_DIR}/forward.mlir 
+            -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
+            ${BUDDY_BINARY_DIR}/buddy-opt
+            -buffer-deallocation-simplification
+            -convert-linalg-to-loops
+            -eliminate-empty-tensors
+            -llvm-request-c-wrappers
+            -convert-math-to-llvm
+            -convert-math-to-libm
+            -convert-scf-to-cf
+            -convert-arith-to-llvm
+            -expand-strided-metadata
+            -finalize-memref-to-llvm
+            -convert-func-to-llvm
+            -reconcile-unrealized-casts |
+          ${BUDDY_BINARY_DIR}/buddy-translate --buddy-to-llvmir |
+          ${BUDDY_BINARY_DIR}/buddy-llc -filetype=obj -mtriple=riscv64 -O0 -mattr=+buddyext,+D -float-abi=hard -o forward.o
+          DEPENDS forward.mlir
+  COMMENT "Building forward.o"
+  VERBATIM)
+
+add_custom_command(
+  OUTPUT  subgraph0.o
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_GEMMINI_DIR}/subgraph0.mlir 
+            -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" 
+            > subgraph0_linalg.mlir
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_GEMMINI_DIR}/subgraph0.mlir 
+            -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
+          ${BUDDY_BINARY_DIR}/buddy-opt
+            -eliminate-empty-tensors
+            -convert-tensor-to-linalg
+            -linalg-bufferize
+            -batchmatmul-optimize
+            -convert-linalg-to-gemmini
+          > subgraph0_loops.mlir
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_GEMMINI_DIR}/subgraph0.mlir 
+            -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
+          ${BUDDY_BINARY_DIR}/buddy-opt
+            -eliminate-empty-tensors
+            -linalg-bufferize
+            -tensor-bufferize
+            -func-bufferize
+            -convert-linalg-to-gemmini
+            -expand-strided-metadata
+            -convert-linalg-to-loops
+            -convert-scf-to-cf
+            -llvm-request-c-wrappers
+            -lower-gemmini 
+            -arith-bufferize
+            -buffer-deallocation
+            -finalizing-bufferize
+            -convert-arith-to-llvm
+            -convert-func-to-llvm
+            -finalize-memref-to-llvm
+            -reconcile-unrealized-casts | 
+          ${BUDDY_BINARY_DIR}/buddy-translate --buddy-to-llvmir
+          > subgraph0.ll
+    COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_GEMMINI_DIR}/subgraph0.mlir 
+          -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named, tosa-to-linalg, tosa-to-tensor, tosa-to-arith))" |
+        ${BUDDY_BINARY_DIR}/buddy-opt
+          -eliminate-empty-tensors
+          -linalg-bufferize
+          -tensor-bufferize
+          -func-bufferize
+          -convert-linalg-to-gemmini
+          -expand-strided-metadata
+          -convert-linalg-to-loops
+          -convert-scf-to-cf
+          -llvm-request-c-wrappers
+          -lower-gemmini 
+          -arith-bufferize
+          -buffer-deallocation
+          -finalizing-bufferize
+          -convert-arith-to-llvm
+          -convert-func-to-llvm
+          -finalize-memref-to-llvm
+          -reconcile-unrealized-casts | 
+        ${BUDDY_BINARY_DIR}/buddy-translate --buddy-to-llvmir |
+        ${BUDDY_BINARY_DIR}/buddy-llc -filetype=obj -mtriple=riscv64 -O0 -mattr=+buddyext,+D -float-abi=hard -o subgraph0.o
+        DEPENDS subgraph0.mlir
+  COMMENT "Building subgraph0.o"
+  VERBATIM)
+
+
+add_library(GemminiLENET STATIC subgraph0.o forward.o)
+set_target_properties(GemminiLENET PROPERTIES LINKER_LANGUAGE CXX)
+
+add_executable(buddy-gemmini-lenet-run buddy-lenet-main.cpp)
+add_dependencies(buddy-gemmini-lenet-run GemminiLENET)
+target_include_directories(buddy-gemmini-lenet-run PRIVATE ${INTERFACE_DIR} ${INCLUDE_DIR})
+target_link_libraries(buddy-gemmini-lenet-run -static GemminiLENET)
diff --git a/BuddyGemmini/README.md b/BuddyGemmini/README.md
@@ -0,0 +1,85 @@
+# BuddyGemmini LeNet E2E deployment on Firesim
+
+## Overview
+This guide provides an example of end-to-end deployment of a DNN (LeNet) inference to a DSA backend (Gemmini) using the Buddy Compiler. 
+
+We use FireSim, a platform for FPGA-accelerated cycle-accurate simulation, to run end-to-end DNN workloads that would take too long to run on Verilator/VCS. FireSim also allows users to check that their Gemmini hardware/software will work when running in a Linux environment. The FireSim used in this guide is installed locally on a Xilinx VCU118.
+
+## Preparation
+Before proceed any further make sure that you installed dependencies below
+1. Installation of [Buddy-mlir basic environment and cross-compilation toolchain](https://github.com/buddy-compiler/buddy-mlir/blob/main/docs/RVVEnvironment.md)
+
+2. Environment installation for [Chipyard](https://chipyard.readthedocs.io/en/1.11.0/) and [Firesim](https://docs.fires.im/en/1.18.0/). The environment for this guide is based on a local acceleration card, the VCU118, with configuration versions Chipyard 1.11.0 and FireSim 1.18.0. We recommend installing these versions (install firesim as a submodule of chipyard) and completing all the content in the FireSim documentation's [Getting Started Guide](https://docs.fires.im/en/1.18.0/Getting-Started-Guides/On-Premises-FPGA-Getting-Started/Repo-Setup/Xilinx-Alveo-U280.html).
+
+3. Complete the build of [gemmini](https://github.com/ucb-bar/gemmini), and building a complete bitstream file based on the default Gemmini configuration using the firesim buildbitstream command.
+
+## Cross-compilation
+1. Activate your python environment.
+
+2. Build buddy-gemmini-lenet-run
+
+```
+$ mkdir build && cd build
+$ cmake .. -DBUDDY_MLIR_DIR=/path/to/buddy-mlir/ # replace with your buddy-mlir directory path
+$ make buddy-gemmini-lenet-run
+```
+
+## Deployment to FireSim
+1. Copy the executable files (located in the `BuddyGemmini/build/`) and the required data files to Gemmini's software path
+```
+$ cd chipyard # go to your chipyard root directory 
+$ mkdir ./generators/gemmini/software/overlay/root/BuddyGemmini/
+$ cp ${BUDDYGEMMINI_EXAMPLE_PATH}/build/buddy-gemmini-lenet-run  ./generators/gemmini/software/overlay/root/BuddyGemmini/
+$ cp ${BUDDYGEMMINI_EXAMPLE_PATH}/arg0.data  ./generators/gemmini/software/overlay/root/BuddyGemmini/
+$ cp -r ${BUDDYGEMMINI_EXAMPLE_PATH}/images/  ./generators/gemmini/software/overlay/root/BuddyGemmini/
+```
+2. Build software for the target platform
+```
+$ cd chipyard
+$ ./sims/firesim/sw/firesim-software/marshal -v build ./generators/gemmini/software/gemmini-tests-interactive.json && ./sims/firesim/sw/firesim-software/marshal -v install ./generators/gemmini/software/gemmini-tests-interactive.json
+```
+
+3. Activate your Firesim environment.
+```
+$ cd chipyard/sim/firesim 
+$ source ./sourceme-manager.sh --skip-ssh-setup
+```
+
+4. In the `firesim/deploy/` path, there are four files that configure key information for FireSim's build workload, bitstream, runtime, etc. Please check the following configurations:
+
+- `config_build_recipes.yaml`: Configures the Gemmini configuration, such as `firesim_custom_gemmini_rocket_singlecore_no_nic`
+- `config_build.yaml`: Under `builds_to_run`, select the Gemmini configuration, such as `firesim_custom_gemmini_rocket_singlecore_no_nic`
+- `config_hwdb.yaml`: For `bitstream_tar`, configure the absolute path where your generated Gemmini bitstream is stored
+- `config_runtime.yaml`: This file is for building the runtime. Please modify the `workload_name` to `gemmini-tests-interactive.json`. We will execute this interactive configuration after starting the simulation later.
+
+
+5. Build and deploy simulation infrastructure to the Run Farm Machines. Each time you change your workload content, please re-execute `step 2` to `step 5`.
+
+```
+$ firesim infrasetup
+```
+
+6. Start simulation on Run Farm Machines. After executing the command below, the terminal will display a background monitor of the simulation running.
+
+```
+$ firesim runworkload
+```
+
+7. SSH connect to `BUILD_FARM_IP`, open a new terminal connection to the screen created by Run Farm Machines (please refer to the FireSim documentation to confirm you can correctly connect to Run Farm Machines).
+
+```
+$ ssh BUILD_FARM_IP
+$ screen -r fsim0
+```
+
+## Final step!
+Now, you can login to the system! The username is root and there is no password. The steps described here are for manual execution. You can also refer to the writing of `gemmini-tests.json` and `overlay/root/run-tests.sh` to write your own automated execution script. This will change the manual operations after firesim runworkload to automatic execution. The corresponding log files will be recorded in the `/firesim/deploy/results-workload` folder.
+
+```
+$ cd ./BuddyGemmini
+$ export BUDDYGEMMINI_EXAMPLE_PATH=$PWD
+$ ./buddy-gemmini-lenet-run
+```
+
+If all steps go well, you will see the output below. Good luck.
+![demo](./doc/demo.png)
diff --git a/BuddyGemmini/buddy-lenet-import.py b/BuddyGemmini/buddy-lenet-import.py
@@ -0,0 +1,76 @@
+# ===- buddy-lenet-import.py ---------------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===---------------------------------------------------------------------------
+#
+# This is the LeNet model AOT importer.
+#
+# ===---------------------------------------------------------------------------
+
+import os
+from pathlib import Path
+
+import numpy as np
+import torch
+from torch._inductor.decomposition import decompositions as inductor_decomp
+
+from buddy.compiler.frontend import DynamoCompiler
+from buddy.compiler.graph import GraphDriver
+from buddy.compiler.graph.transform import simply_fuse
+from buddy.compiler.ops import tosa
+from model import LeNet
+
+# Retrieve the LeNet model path from environment variables.
+model_path = os.environ.get("BUDDYGEMMINI_EXAMPLE_PATH")
+if model_path is None:
+    raise EnvironmentError(
+        "The environment variable 'LENET_MODEL_PATH' is not set or is invalid."
+    )
+
+model = LeNet()
+model = torch.load(model_path + "/lenet-model.pth")
+model = model.eval()
+
+# Initialize Dynamo Compiler with specific configurations as an importer.
+dynamo_compiler = DynamoCompiler(
+    primary_registry=tosa.ops_registry,
+    aot_autograd_decomposition=inductor_decomp,
+)
+
+data = torch.randn([1, 1, 28, 28])
+# Import the model into MLIR module and parameters.
+with torch.no_grad():
+    graphs = dynamo_compiler.importer(model, data)
+
+assert len(graphs) == 1
+graph = graphs[0]
+params = dynamo_compiler.imported_params[graph]
+pattern_list = [simply_fuse]
+graphs[0].fuse_ops(pattern_list)
+driver = GraphDriver(graphs[0])
+driver.subgraphs[0].lower_to_top_level_ir()
+path_prefix = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
+    print(driver.subgraphs[0]._imported_module, file=module_file)
+with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
+    print(driver.construct_main_graph(True), file=module_file)
+
+params = dynamo_compiler.imported_params[graph]
+current_path = os.path.dirname(os.path.abspath(__file__))
+
+float32_param = np.concatenate(
+    [param.detach().numpy().reshape([-1]) for param in params]
+)
+
+float32_param.tofile(Path(current_path) / "arg0.data")