diff --git a/fpga/Makefile b/fpga/Makefile index 83ad6069..31b0c4dd 100644 --- a/fpga/Makefile +++ b/fpga/Makefile @@ -15,12 +15,14 @@ else @echo "Bitstream generation for zcu102 board finished. The bitstream Configuration Memory File was copied to ./pulp_zcu102.bit and ./pulp_zcu102.bin" endif +flash_zcu102: ## flashes the ZCU102 bitstream to the board. + $(MAKE) -C pulp flash BOARD=zcu102 + clean_zcu102: ## Removes all bitstreams, *.log files and vivado related files (rm -rf vivado*) for the zcu102 board. $(MAKE) -C pulp clean BOARD=zcu102 rm -f pulp_zcu102.bit rm -f pulp_zcu102.bin - vcu118: ## Generates the bitstream for the vcu118 board ifdef gui $(MAKE) -C pulp gui BOARD=vcu118 @@ -31,12 +33,14 @@ else @echo "Bitstream generation for vcu118 board finished. The bitstream Configuration Memory File was copied to ./pulp_vcu118.bit and ./pulp_vcu118.bin" endif +flash_vcu118: ## flashes the VCU118 bitstream to the board. + $(MAKE) -C pulp flash BOARD=vcu118 + clean_vcu118: ## Removes all bitstreams, *.log files and vivado related files (rm -rf vivado*) for the vcu118 board. $(MAKE) -C pulp clean BOARD=vcu118 rm -f pulp_vcu118.bit rm -f pulp_vcu118.bin - clean_ips: ## Cleans the generated IPs $(MAKE) -C pulp clean-ips BOARD=zcu102 diff --git a/fpga/README.md b/fpga/README.md index 3db0fc5b..45354ff7 100644 --- a/fpga/README.md +++ b/fpga/README.md @@ -6,7 +6,18 @@ subdirectory for the various supported boards: * Xilinx ZCU102 * Xilinx VCU118 (untested) -### Bitstream Generation +## Bitstream Generation and Flashing + +### Vivado Versions +Tested with Vivado 2022.1. If the board part is not found (e.g., `xilinx.com:zcu102:part0:3.3`), you may need to change the version (for older versions of Vivado, for example from `3.3` to `3.2`). + +The scripts assume that the `vivado` command points to the vivado binary. On IIS systems using SEPP packages, run commands as +``` +vitis-2022.1 make +``` +using the appropriate `vitis-` or `vivado-` command. + +### Generating the Bitstream In order to generate the PULP bitstream for a supported target FPGA board, first fetch the required dependencies and generate the corresponding scripts with bender in the project root directory. To do this, run the followin commands in the project root directory: @@ -36,12 +47,70 @@ Note: if using a different command to launch vivado, you can set the `$(VIVADO)` ### Bitstream Flashing -TODO: describe bitstream flashing +To flash the bitstream, use the `make flash_zcu102` (tested) and `make flash_vcu118` (untested** commands after running the corresponding bitstream generation targets. Of course, make sure the board is correctly: +* powered on, and +* connected via USB + +## Compiling and Running Programs on FPGA + +The following instructions are based on commit `ddebe93` of the PULP SDK. This is an old version. The changes to make it work with FPGA were: +* The `pos_init_fll` function now returns the `ARCHI_FPGA_FREQUENCY` instead of trying to configure the FLL, and all FLLs are registered as running with that frequency. +* `ARCHI_FPGA_FREQUENCY` was changed to 10 MHz, matching the configuration of the FPGA port. + +To run your program on the FPGA platform, you need the following ingredients: +* A USB-JTAG adapter connected correctly to the FPGA board +* A running OpenOCD instance connected to the emulated PULP system via that adapter +* GNU `gdb` to manage the execution + +### Compilation + +Using a suitable version of the PULP SDK, run the following command in your software project (`io=uart` is not strictly necessary in the referenced commit but it doesn't hurt to have it there): +``` +make build platform=fpga io=uart -j4 +``` +### Connecting to the FPGA Board +To connect an ARM 20-pin JTAG header to the PULP system's JTAG interface on ZCU102/VCU118, connect the following pins: + + +| Signal | ARM Pin | ZCU102/VCU118 PMOD0 Pin | +|--------|---------|-------------------------| +| TMS | 7 | 0 | +| TDI | 5 | 1 | +| TDO | 13 | 2 | +| TCK | 9 | 3 | +| TRSTN | 3 | 4 (not used) | + + +Now you can connect to the board with OpenOCD (assuming you are using ZCU102 and have an Olimex JTAG interface): +``` +openocd -f pulp-zcu102/openocd-zcu102-olimex-arm-usb-ocd-h.cfg +``` -### Compiling and running on fpga +### Running Your Program -TODO: describe compiling and running on fpga +To flash your program with JTAG, you can now use `gdb` - in the `fpga` folder, an example GDB script that flashes the program is provided as `do.gdb`. You can copy it to your C project for convenience. Then, from your C project directory and with the SDK correctly configured, run: +``` +alias rvgdb=${PULP_RISCV_GCC_TOOLCHAIN}/bin/riscv32-unknown-elf-gdb +rvgdb -x do.gdb ${PATH_TO_BINARY} +``` +where `$PATH_TO_BINARY` is the location of your compiled RISCV binary, usually something like `BUILD/PULP/GCC_RISCV/test/test` if your application name is `test`. +For UART input/output, PULP's UART RX/TX pins are mapped to connect to the onboard USB-UART bridge, so connecting a USB cable to the appropriate USB port is sufficient. On ZCU102, it is connected to UART port 2 of the CP2108 USB-UART chip. To find out which `/dev/ttyUSB*` this corresponds to, run +``` +dmesg | grep tty +``` +to get an output like: +``` +[...] +[15626159.946345] usb 1-2: cp210x converter now attached to ttyUSB0 +[15626159.947768] usb 1-2: cp210x converter now attached to ttyUSB2 +[15626159.949614] usb 1-2: cp210x converter now attached to ttyUSB3 +[15626159.951680] usb 1-2: cp210x converter now attached to ttyUSB4 +[...] +``` +In this case, UART0 is `ttyUSB0`, UART1 is `ttyUSB2` and UART2 is `ttyUSB3`. Note that the numbering needn't be linear, depending on what other devices you have attached to your computer. +To connect to PULP with UART using `screen`, in the above case we would thus run: +`screen /dev/ttyUSB3 115200,cs8` diff --git a/fpga/do.gdb b/fpga/do.gdb new file mode 100644 index 00000000..cbd70849 --- /dev/null +++ b/fpga/do.gdb @@ -0,0 +1,4 @@ +target remote :3333 +tui enable +layout split +load diff --git a/fpga/pulp-vcu118/fpga-settings.mk b/fpga/pulp-vcu118/fpga-settings.mk index 1cbaed15..864f54da 100644 --- a/fpga/pulp-vcu118/fpga-settings.mk +++ b/fpga/pulp-vcu118/fpga-settings.mk @@ -1,6 +1,7 @@ export BOARD=vcu118 export XILINX_PART=xcvu9p-flga2104-2L-e export XILINX_BOARD=xilinx.com:vcu118:part0:2.0 +export XILINX_FPGA_DEV=xcvu9_0 export FC_CLK_PERIOD_NS=100 export CL_CLK_PERIOD_NS=100 export PER_CLK_PERIOD_NS=100 diff --git a/fpga/pulp-zcu102/fpga-settings.mk b/fpga/pulp-zcu102/fpga-settings.mk index c6c51532..e3dcd9b6 100644 --- a/fpga/pulp-zcu102/fpga-settings.mk +++ b/fpga/pulp-zcu102/fpga-settings.mk @@ -1,6 +1,7 @@ export BOARD=zcu102 export XILINX_PART=xczu9eg-ffvb1156-2-e -export XILINX_BOARD=xilinx.com:zcu102:part0:3.2 +export XILINX_BOARD=xilinx.com:zcu102:part0:3.3 +export XILINX_FPGA_DEV=xczu9_0 export FC_CLK_PERIOD_NS=100 export CL_CLK_PERIOD_NS=100 export PER_CLK_PERIOD_NS=100 diff --git a/fpga/pulp/Makefile b/fpga/pulp/Makefile index 6460b9b7..364b1dff 100644 --- a/fpga/pulp/Makefile +++ b/fpga/pulp/Makefile @@ -12,6 +12,9 @@ VIVADO ?= vivado all: ips ## Generate the bitstream for pulpissimo with vivado in batch mode. The vivado invocation command may be overriden with the env variable VIVADO. $(VIVADO) -mode batch -source tcl/run.tcl +flash: + $(VIVADO) -mode batch -source tcl/flash_bitstream.tcl + gui: ips ## Generates the bitstream for pulpissimo with vivado in GUI mode. The vivado invocation command may be overriden with the env variable VIVADO. $(VIVADO) -mode gui -source tcl/run.tcl & diff --git a/fpga/pulp/tcl/flash_bitstream.tcl b/fpga/pulp/tcl/flash_bitstream.tcl new file mode 100644 index 00000000..e70a8ecd --- /dev/null +++ b/fpga/pulp/tcl/flash_bitstream.tcl @@ -0,0 +1,9 @@ +open_hw_manager +connect_hw_server -allow_non_jtag +open_hw_target +set_property PROGRAM.FILE ../pulp_${::env(BOARD)}.bit [get_hw_devices xczu9_0] +current_hw_device [get_hw_devices $::env(XILINX_FPGA_DEV)] +set_property PROBES.FILE {} [get_hw_devices $::env(XILINX_FPGA_DEV)] +set_property FULL_PROBES.FILE {} [get_hw_devices $::env(XILINX_FPGA_DEV)] +program_hw_devices [get_hw_devices $::env(XILINX_FPGA_DEV)] +refresh_hw_device [lindex [get_hw_devices $::env(XILINX_FPGA_DEV)] 0] diff --git a/rtl/tb/tb_pulp.sv b/rtl/tb/tb_pulp.sv index 676692e7..94a068b6 100644 --- a/rtl/tb/tb_pulp.sv +++ b/rtl/tb/tb_pulp.sv @@ -159,6 +159,8 @@ module tb_pulp; jtag_pkg::debug_mode_if_t debug_mode_if = new; pulp_tap_pkg::pulp_tap_if_soc_t pulp_tap = new; + logic bypass_enable; + /* system wires */ // the w_/s_ prefixes are used to mean wire/tri-type and logic-type (respectively) @@ -186,7 +188,7 @@ module tb_pulp; tri w_i2c1_scl; tri w_i2c1_sda; - + wire [7:0] w_hyper_dq0 ; wire [7:0] w_hyper_dq1 ; wire w_hyper_ck ; @@ -664,8 +666,8 @@ module tb_pulp; // GPIO TEST genvar i; //genvar j; - - + + assign w_gpios[16] = w_gpios[0] ? 1'b1 : 1'b0 ; assign w_gpios[17] = w_gpios[1] ? 1'b1 : 1'b0 ; assign w_gpios[18] = w_gpios[2] ? 1'b1 : 1'b0 ; @@ -778,7 +780,7 @@ module tb_pulp; logic error; int num_err; int rd_cnt; - + automatic logic [9:0] FC_CORE_ID = {5'd31, 5'd0}; int entry_point; @@ -787,7 +789,7 @@ module tb_pulp; error = 1'b0; num_err = 0; rd_cnt=0; - + // read entry point from commandline if ($value$plusargs("ENTRY_POINT=%h", entry_point)) begin_l2_instr = entry_point; @@ -822,9 +824,9 @@ module tb_pulp; jtag_pkg::jtag_reset(s_tck, s_tms, s_trstn, s_tdi); jtag_pkg::jtag_softreset(s_tck, s_tms, s_trstn, s_tdi); #5us; - + s_bootsel= (STIM_FROM=="SPI_FLASH") ? 2'b00 : ( (STIM_FROM=="HYPER_FLASH") ? 2'b10 : 2'b00 ); - + if (STIM_FROM == "HYPER_FLASH") begin $display("[TB] %t - HyperFlash boot: Setting bootsel to 2'b10", $realtime); end else if (STIM_FROM == "SPI_FLASH") begin @@ -835,22 +837,22 @@ module tb_pulp; s_rst_n = 1'b1; debug_mode_if.init_dmi_access(s_tck, s_tms, s_trstn, s_tdi); debug_mode_if.set_dmactive(1'b1, s_tck, s_tms, s_trstn, s_tdi, s_tdo); - #10us; - end - else if (LOAD_L2 == "JTAG") begin + #10us; + end else if (LOAD_L2 == "JTAG" || LOAD_L2 == "FAST_DEBUG_PRELOAD") begin s_bootsel = 2'b01; end - if (LOAD_L2 == "JTAG") begin + if (LOAD_L2 == "JTAG" || LOAD_L2 == "FAST_DEBUG_PRELOAD") begin + if (USE_FLL) - $display("[TB] %t - Using FLL", $realtime); + $display("[TB] %t - Using FLL", $realtime); else - $display("[TB] %t - Not using FLL", $realtime); + $display("[TB] %t - Not using FLL", $realtime); if (USE_SDVT_CPI) - $display("[TB] %t - Using CAM SDVT", $realtime); + $display("[TB] %t - Using CAM SDVT", $realtime); else - $display("[TB] %t - Not using CAM SDVT", $realtime); + $display("[TB] %t - Not using CAM SDVT", $realtime); // read in the stimuli vectors == address_value if ($value$plusargs("stimuli=%s", stimuli_file)) begin @@ -865,7 +867,7 @@ module tb_pulp; // testing on the jtag link jtag_pkg::jtag_reset(s_tck, s_tms, s_trstn, s_tdi); jtag_pkg::jtag_softreset(s_tck, s_tms, s_trstn, s_tdi); - #5us; + #50us; jtag_pkg::jtag_bypass_test(s_tck, s_tms, s_trstn, s_tdi, s_tdo); #5us; @@ -949,31 +951,35 @@ module tb_pulp; $stop; end - $display("[TB] %t - Loading L2", $realtime); - if (USE_PULP_BUS_ACCESS) begin - // use pulp tap to load binary - pulp_tap_pkg::load_L2(num_stim, stimuli, s_tck, s_tms, s_trstn, s_tdi, s_tdo); - - end else begin - // use debug module to load binary - debug_mode_if.load_L2(num_stim, stimuli, s_tck, s_tms, s_trstn, s_tdi, s_tdo); - + if (LOAD_L2 == "JTAG") begin + $display("[TB] %t - Loading L2 via JTAG", $realtime); + if (USE_PULP_BUS_ACCESS) begin + // use pulp tap to load binary, put debug module in bypass + pulp_tap_pkg::load_L2(num_stim, stimuli, s_tck, s_tms, s_trstn, s_tdi, s_tdo); + end else begin + // use debug module to load binary + debug_mode_if.load_L2(num_stim, stimuli, s_tck, s_tms, s_trstn, s_tdi, s_tdo); + end + end + else if (LOAD_L2 == "FAST_DEBUG_PRELOAD") begin + $warning("[TB] - Preloading the memory via direct simulator access. \nNEVER EVER USE THIS MODE TO VERIFY THE BOOT BEHAVIOR OF A CHIP. THIS BOOTMODE IS IMPOSSIBLE ON A PHYSICAL CHIP!!!"); + preload_l2(num_stim, stimuli); end - // configure for debug module dmi access again debug_mode_if.init_dmi_access(s_tck, s_tms, s_trstn, s_tdi); // we have set dpc and loaded the binary, we can go now $display("[TB] %t - Resuming the CORE", $realtime); debug_mode_if.resume_harts(s_tck, s_tms, s_trstn, s_tdi, s_tdo); + + bypass_enable = 1'b0; end if (ENABLE_DPI == 1) begin jtag_mux = JTAG_DPI; end - - #500us; + #800us; // Select UART driver/monitor if ($value$plusargs("uart_drv_mon=%s", uart_drv_mon_sel)) begin @@ -1011,7 +1017,7 @@ module tb_pulp; end - + `ifndef USE_NETLIST /* File System access */ logic r_stdout_pready; @@ -1081,6 +1087,93 @@ module tb_pulp; end end +`ifndef USE_NETLIST + + task automatic preload_l2( + input int num_stim, + ref logic [95:0] stimuli [100000:0] + ); + logic more_stim; + static logic [95:0] stim_entry; + more_stim = 1'b1; + $info("Preloading L2 with stimuli through direct access."); + while (more_stim == 1'b1) begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + stim_entry = stimuli[num_stim]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.req = 1'b1; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.add = stim_entry[95:64]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.wdata = stim_entry[31:0]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.wen = 1'b0; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.be = '1; + do begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + end while (~i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.gnt); + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.add = stim_entry[95:64]+4; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.wdata = stim_entry[63:32]; + do begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + end while (~i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.gnt); + + num_stim = num_stim + 1; + if (num_stim > $size(stimuli) || stimuli[num_stim]===96'bx ) begin // make sure we have more stimuli + more_stim = 0; // if not set variable to 0, will prevent additional stimuli to be applied + break; + end + end // while (more_stim == 1'b1) + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.req; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.add; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.wdata; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.wen; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug.be; + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + $info("Done with direct preloading of L2!"); + endtask // preload_l2 + +`else + + task automatic preload_l2( + input int num_stim, + ref logic [95:0] stimuli [100000:0] + ); + logic more_stim; + static logic [95:0] stim_entry; + more_stim = 1'b1; + $info("Preloading L2 with stimuli through direct access."); + while (more_stim == 1'b1) begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + stim_entry = stimuli[num_stim]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_req = 1'b1; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_add = stim_entry[95:64]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_wdata = stim_entry[31:0]; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_wen = 1'b0; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_be = '1; + do begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + end while (~i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_gnt); + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_add = stim_entry[95:64]+4; + force i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_wdata = stim_entry[63:32]; + do begin + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + end while (~i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_gnt); + + num_stim = num_stim + 1; + if (num_stim > $size(stimuli) || stimuli[num_stim]===96'bx ) begin // make sure we have more stimuli + more_stim = 0; // if not set variable to 0, will prevent additional stimuli to be applied + break; + end + end // while (more_stim == 1'b1) + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_req; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_add; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_wdata; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_wen; + release i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.tcdm_debug_be; + @(posedge i_dut.soc_domain_i.pulp_soc_i.i_soc_interconnect_wrap.clk_i); + $info("Done with direct preloading of L2!"); + endtask // preload_l2 + + +`endif + endmodule // tb_pulp