From cd149e245d5e05c59bb020f1ab1b6e72bbf4f979 Mon Sep 17 00:00:00 2001 From: Randy Hartgrove Date: Thu, 10 Mar 2022 07:24:40 -0800 Subject: [PATCH] HwAccel & Getting Started: updates to Makefiles to support HW builds and runs * updated setup instructions * Update Part4-embedded_platform.md * updated xrt.ini to add native_xrt_trace=true * created test list * updated with "select case" /group/xcoswmktg/randyh/sprite-test-list.txt * Delete randy_techdocs_json_randyh_Vitis-Tutorials.list * Updated host.cpp to support hw build on zcu104 * edited to remove source files which are causing a Makefile error * changed -lpthread to -pthread * updated platform for 2022.1 changed to xilinx_u200_gen3x16_xdma_2_202110_1 * imported Uday's changes from GitHub PR#185 and updated xrt.ini options for 2022.1 * resolved g++ command issues though $XILINX_VIVADO/include does not appear necessary...maybe remove later and also removed the platform_desc.txt file --- .../Vitis/Part4-embedded_platform.md | 7 +- Getting_Started/Vitis/example/src/host.cpp | 2 +- Getting_Started/Vitis/example/u200/Makefile | 12 +- .../Vitis/example/u200/description.json | 2 +- Getting_Started/Vitis/example/zcu102/Makefile | 15 +- .../Vitis/example/zcu102/description.json | 2 +- .../Vitis/example/zcu102/run_hw.sh | 2 - .../Vitis/example/zcu102/run_hw_emu.sh | 2 - .../Vitis/example/zcu102/run_sw_emu.sh | 2 - .../Vitis_HLS/reference-files/Makefile | 4 +- .../reference-files/description.json | 4 +- .../01-convolution-tutorial/Makefile | 2 +- .../01-convolution-tutorial/description.json | 2 +- .../02-bloom/makefile/Makefile | 2 +- .../02-bloom/makefile/common.mk | 2 +- .../02-bloom/makefile/description.json | 4 +- .../02-bloom/makefile/multi_params.json | 6 +- .../hw/Makefile | 5 +- .../build/description.json | 2 +- .../docs/module1_baseline/Makefile | 3 +- .../docs/module1_baseline/description.json | 2 +- .../docs/module2_pipeline/Makefile | 2 +- .../docs/module2_pipeline/description.json | 2 +- .../docs/module3_datatype/Makefile | 2 +- .../docs/module3_datatype/description.json | 2 +- .../docs/module4_dataflow/Makefile | 2 +- .../docs/module4_dataflow/description.json | 2 +- .../07-host-code-opt/reference-files/Makefile | 4 +- .../reference-files/description.json | 2 +- .../reference-files/Makefile | 6 +- .../reference-files/description.json | 2 +- .../reference-files/xrt.ini | 2 +- .../reference-files/Makefile | 4 +- .../reference-files/description.json | 2 +- .../reference_files/description.json | 2 +- .../reference-files/Makefile | 4 +- .../reference-files/description.json | 2 +- .../05-using-multiple-cu/README.md | 36 +- .../reference-files/Makefile | 9 +- .../reference-files/description.json | 2 +- .../reference-files/link.cfg | 4 +- .../reference-files/link3.cfg | 1 + .../reference-files/src/host/host-final.cpp | 92 +---- .../reference-files/src/host/host.cpp | 90 +--- .../reference-files/src/host/host_opencv.cpp | 388 ++++++++++++++++++ .../reference-files/xrt.ini | 2 +- .../reference-files/Makefile | 4 +- .../reference-files/description.json | 2 +- .../07-using-hbm/makefile/Makefile | 2 +- .../07-using-hbm/makefile/common.mk | 2 +- .../07-using-hbm/makefile/description.json | 2 +- .../08-using-hostmem/reference-files/Makefile | 2 +- .../reference-files/description.json | 2 +- Jenkinsfile | 4 +- 54 files changed, 499 insertions(+), 271 deletions(-) create mode 100644 Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host_opencv.cpp diff --git a/Getting_Started/Vitis/Part4-embedded_platform.md b/Getting_Started/Vitis/Part4-embedded_platform.md index 8153ac45eb..0e4824646b 100644 --- a/Getting_Started/Vitis/Part4-embedded_platform.md +++ b/Getting_Started/Vitis/Part4-embedded_platform.md @@ -29,7 +29,7 @@ ### Setting up the environment -> IMPORTANT: This tutorial requires Vitis 2021.1 or later to run. +> IMPORTANT: This tutorial requires Vitis 2022.1 or later to run. *NOTE: The instructions provided below assume that you are running in a bash shell.* @@ -48,11 +48,10 @@ export PLATFORM_REPO_PATHS= export ROOTFS= ``` -* To properly source the cross-compilation SDK, run the `environment-setup-aarch64-xilinx-linux` script in the directory -where you extracted the SDK source. +* To properly source the cross-compilation SDK, run the `environment-setup-cortexa72-cortexa53-xilinx-linux` script in the $ROOTFS directory. ```bash -source /environment-setup-aarch64-xilinx-linux +source $ROOTFS/environment-setup-cortexa72-cortexa53-xilinx-linux ``` *NOTE: The ZYNQMP common image file can be downloaded from the [Vitis Embedded Platforms](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/embedded-platforms.html) page, and contains the Sysroot, Rootfs, and boot Image for Xilinx Zynq MPSoC devices.* diff --git a/Getting_Started/Vitis/example/src/host.cpp b/Getting_Started/Vitis/example/src/host.cpp index 4b742a6b08..2b8570cb30 100644 --- a/Getting_Started/Vitis/example/src/host.cpp +++ b/Getting_Started/Vitis/example/src/host.cpp @@ -67,7 +67,7 @@ int main(int argc, char **argv) // Create the buffers and allocate memory cl::Buffer in1_buf(context, CL_MEM_READ_ONLY, sizeof(int) * DATA_SIZE, NULL, &err); cl::Buffer in2_buf(context, CL_MEM_READ_ONLY, sizeof(int) * DATA_SIZE, NULL, &err); - cl::Buffer out_buf(context, CL_MEM_WRITE_ONLY, sizeof(int) * DATA_SIZE, NULL, &err); + cl::Buffer out_buf(context, CL_MEM_READ_WRITE, sizeof(int) * DATA_SIZE, NULL, &err); // Map buffers to kernel arguments, thereby assigning them to specific device memory banks krnl_vector_add.setArg(0, in1_buf); diff --git a/Getting_Started/Vitis/example/u200/Makefile b/Getting_Started/Vitis/example/u200/Makefile index e81a2cf444..9345e1cbff 100644 --- a/Getting_Started/Vitis/example/u200/Makefile +++ b/Getting_Started/Vitis/example/u200/Makefile @@ -1,7 +1,7 @@ XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/Getting_Started/Vitis/example/src/*}') TARGET := hw_emu -PLATFORM := xilinx_u200_gen3x16_xdma_1_202110_1 +PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1 SRCDIR := $(XF_PROJ_ROOT)/Getting_Started/Vitis/example/src BUILD_DIR := $(TARGET) @@ -14,18 +14,18 @@ else cd $(BUILD_DIR) && XCL_EMULATION_MODE=$(TARGET) ./app.exe endif -#build: host emconfig xclbin -build: $(BUILD_DIR)/app.exe $(BUILD_DIR)/emconfig.json $(BUILD_DIR)/vadd.xclbin +build: host emconfig xclbin +#build: $(BUILD_DIR)/app.exe $(BUILD_DIR)/emconfig.json $(BUILD_DIR)/vadd.xclbin host: $(BUILD_DIR)/app.exe -$(BUILD_DIR)/app.exe: $(SRCDIR)/host.cpp +$(BUILD_DIR)/app.exe: mkdir -p $(BUILD_DIR) g++ -Wall -g -std=c++11 $(SRCDIR)/host.cpp -o $(BUILD_DIR)/app.exe \ -I${XILINX_XRT}/include/ \ - -L${XILINX_XRT}/lib/ -lOpenCL -lpthread -lrt -lstdc++ + -L${XILINX_XRT}/lib/ -lOpenCL -pthread -lrt -lstdc++ xo: $(BUILD_DIR)/vadd.xo -$(BUILD_DIR)/vadd.xo: $(SRCDIR)/vadd.cpp +$(BUILD_DIR)/vadd.xo: v++ -c -t ${TARGET} --platform $(PLATFORM) --config $(SRCDIR)/u200.cfg -k vadd -I$(SRCDIR) $(SRCDIR)/vadd.cpp -o $(BUILD_DIR)/vadd.xo xclbin: $(BUILD_DIR)/vadd.xclbin diff --git a/Getting_Started/Vitis/example/u200/description.json b/Getting_Started/Vitis/example/u200/description.json index 3bd5ddb977..92135078a8 100644 --- a/Getting_Started/Vitis/example/u200/description.json +++ b/Getting_Started/Vitis/example/u200/description.json @@ -3,7 +3,7 @@ "description": "getting-started-with-vitis", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Getting_Started/Vitis/example/zcu102/Makefile b/Getting_Started/Vitis/example/zcu102/Makefile index 875b50aa86..83dfe66198 100644 --- a/Getting_Started/Vitis/example/zcu102/Makefile +++ b/Getting_Started/Vitis/example/zcu102/Makefile @@ -1,13 +1,13 @@ ndef = $(if $(value $(1)),,$(error $(1) must be set prior to running)) XF_PROJ_ROOT ?= $(shell bash -c 'export MK_PATH=$(MK_PATH); echo $${MK_PATH%/Getting_Started/Vitis/example/src/*}') +CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++ TARGET := hw_emu -PLATFORM := xilinx_zcu102_base_202120_1 +PLATFORM := xilinx_zcu102_base_202210_1 SRCDIR := $(XF_PROJ_ROOT)/Getting_Started/Vitis/example/src BUILD_DIR := $(TARGET) EMBEDDED_EXEC_SCRIPT := run_$(TARGET).sh -EMBEDDED_PACKAGE_OUT := $(BUILD_DIR)package/sd_card.img SDKTARGETSYSROOT := $(SYSROOT) ROOTFS ?= $(EDGE_COMMON_SW) @@ -20,8 +20,8 @@ else cd $(BUILD_DIR)/package && launch_$(TARGET).sh -no-reboot -run-app $(EMBEDDED_EXEC_SCRIPT) endif -#build: host emconfig xclbin -build: $(BUILD_DIR)/app.exe $(BUILD_DIR)/vadd.xclbin $(BUILD_DIR)/package/sd_card.img +build: host xclbin package +#build: $(BUILD_DIR)/app.exe $(BUILD_DIR)/vadd.xclbin $(BUILD_DIR)/package/sd_card.img host: $(BUILD_DIR)/app.exe $(BUILD_DIR)/app.exe: @@ -29,9 +29,10 @@ $(BUILD_DIR)/app.exe: mkdir -p $(BUILD_DIR) cp xrt.ini $(BUILD_DIR) cp run_$(TARGET).sh $(BUILD_DIR) - $(CXX) -Wall -g -std=c++11 $(SRCDIR)/host.cpp -o $(BUILD_DIR)/app.exe \ - -I/usr/include/xrt \ - -lOpenCL -lpthread -lrt -lstdc++ + $(CXX) $(SRCDIR)/host.cpp -o $(BUILD_DIR)/app.exe -Wall -O0 -g -std=c++1y -fmessage-length=0 \ + -I$(SDKTARGETSYSROOT)/usr/include/xrt -L$(SDKTARGETSYSROOT)/usr/lib \ + -I$XILINX_VIVADO/include \ + -lOpenCL -pthread -lrt -lstdc++ --sysroot=$(SDKTARGETSYSROOT) xo: $(BUILD_DIR)/vadd.xo $(BUILD_DIR)/vadd.xo: diff --git a/Getting_Started/Vitis/example/zcu102/description.json b/Getting_Started/Vitis/example/zcu102/description.json index c7174eeacf..43eb50a176 100644 --- a/Getting_Started/Vitis/example/zcu102/description.json +++ b/Getting_Started/Vitis/example/zcu102/description.json @@ -3,7 +3,7 @@ "description": "getting-started-with-vitis", "flow": "vitis", - "platform_whitelist": ["zcu102"], + "platform_allowlist": ["zcu102"], "testinfo": { "disable": 0, diff --git a/Getting_Started/Vitis/example/zcu102/run_hw.sh b/Getting_Started/Vitis/example/zcu102/run_hw.sh index 0d183d5a41..c5de631bac 100644 --- a/Getting_Started/Vitis/example/zcu102/run_hw.sh +++ b/Getting_Started/Vitis/example/zcu102/run_hw.sh @@ -8,8 +8,6 @@ mount /dev/mmcblk0p1 /mnt cd /mnt -cp platform_desc.txt /etc/xocl.txt - export XILINX_XRT=/usr export XILINX_VITIS=/mnt diff --git a/Getting_Started/Vitis/example/zcu102/run_hw_emu.sh b/Getting_Started/Vitis/example/zcu102/run_hw_emu.sh index 48a3ca98ef..714cf98bc5 100644 --- a/Getting_Started/Vitis/example/zcu102/run_hw_emu.sh +++ b/Getting_Started/Vitis/example/zcu102/run_hw_emu.sh @@ -7,8 +7,6 @@ mount /dev/mmcblk0p1 /mnt cd /mnt -cp platform_desc.txt /etc/xocl.txt - export XILINX_XRT=/usr export XILINX_VITIS=/mnt export XCL_EMULATION_MODE=hw_emu diff --git a/Getting_Started/Vitis/example/zcu102/run_sw_emu.sh b/Getting_Started/Vitis/example/zcu102/run_sw_emu.sh index bab2350bff..281e568d99 100644 --- a/Getting_Started/Vitis/example/zcu102/run_sw_emu.sh +++ b/Getting_Started/Vitis/example/zcu102/run_sw_emu.sh @@ -7,8 +7,6 @@ mount /dev/mmcblk0p1 /mnt cd /mnt -cp platform_desc.txt /etc/xocl.txt - export XILINX_XRT=/usr export XILINX_VITIS=/mnt export XCL_EMULATION_MODE=sw_emu diff --git a/Getting_Started/Vitis_HLS/reference-files/Makefile b/Getting_Started/Vitis_HLS/reference-files/Makefile index 4ade27b7da..5c4007a562 100644 --- a/Getting_Started/Vitis_HLS/reference-files/Makefile +++ b/Getting_Started/Vitis_HLS/reference-files/Makefile @@ -19,13 +19,13 @@ help: @echo "" ####################################################################################### TARGET := sw_emu -PLATFORM := xilinx_u200_gen3x16_xdma_1_202110_1 +PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1 HOST_EXE := dct_top XO := dct.$(TARGET).$(PLATFORM).xo XCLBIN := dct.$(TARGET).$(PLATFORM).xclbin # Host building global settings -CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -lpthread -lrt -lstdc++ +CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -pthread -lrt -lstdc++ CXXFLAGS2 := -lOpenCL # Kernel compiler & linker global settings diff --git a/Getting_Started/Vitis_HLS/reference-files/description.json b/Getting_Started/Vitis_HLS/reference-files/description.json index 660ec6e5ce..ab90f80843 100644 --- a/Getting_Started/Vitis_HLS/reference-files/description.json +++ b/Getting_Started/Vitis_HLS/reference-files/description.json @@ -3,8 +3,8 @@ "description": "Getting Started with Vitis HLS", "flow": "vitis", - "platform_whitelist": ["u200"], - "platform_blacklist": ["u280"], + "platform_allowlist": ["u200"], + "platform_blocklist": ["u280"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/Makefile b/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/Makefile index 25bccc9179..85b41a8c07 100755 --- a/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/Makefile @@ -103,7 +103,7 @@ CXXLDFLAGS := -L$(XILINX_XRT)/lib/ ifneq ($(INPUT_TYPE),random) CXXLDFLAGS += -L$(OPENCV_LIB)/ endif -CXXLDFLAGS += -lOpenCL -lpthread -lrt -lstdc++ -lxilinxopencl -fopenmp +CXXLDFLAGS += -lOpenCL -pthread -lrt -lstdc++ -lxilinxopencl -fopenmp ifneq ($(INPUT_TYPE),random) CXXLDFLAGS += -Wl,-rpath=$(OPENCV_LIB)/ -lopencv_core -lopencv_highgui endif diff --git a/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/description.json b/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/description.json index 23e0fea8e7..6eaa41b5a1 100644 --- a/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/description.json +++ b/Hardware_Acceleration/Design_Tutorials/01-convolution-tutorial/description.json @@ -3,7 +3,7 @@ "description": "01-convolution-tutorial", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "tasks": { diff --git a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/Makefile b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/Makefile index 3e2eabf504..3877659738 100644 --- a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/Makefile @@ -20,7 +20,7 @@ ifeq ($(STEP), kernel_16) PF := 16 endif -PLATFORM :=xilinx_u200_gen3x16_xdma_1_202110_1 +PLATFORM :=xilinx_u200_gen3x16_xdma_2_202110_1 #SRCDIR := ./../reference_files ifeq ($(SPRITE),true) diff --git a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/common.mk b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/common.mk index 963a5cbd34..8b0c666797 100644 --- a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/common.mk +++ b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/common.mk @@ -54,7 +54,7 @@ host: $(SRCDIR)/*.cpp $(SRCDIR)/*.c $(SRCDIR)/*.h -O3 -Wall -fmessage-length=0 -std=c++11\ $(HOST_SRC_CPP) \ -L$(XILINX_XRT)/lib/ \ - -lxilinxopencl -lpthread -lrt \ + -lxilinxopencl -pthread -lrt \ -o $(BUILDDIR)/host emconfig.json: diff --git a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/description.json b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/description.json index 772e853618..cbdf183001 100644 --- a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/description.json +++ b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/description.json @@ -3,7 +3,7 @@ "description": "02-bloom", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ @@ -23,7 +23,7 @@ "vitis_hw_build", "vitis_hw_run" ], - "make_options" : [ "SPRITE='true'", "TARGET='$user_test_mode'", "XF_PROJ_ROOT='${GITHUB_LIBS_REPO}/Vitis-Tutorials'", "PLATFORM_REPO_PATHS='$PLATFORM_PATH'", "SpriteEn=1" ], + "make_options" : [ "SPRITE='true'", "TARGET='$user_test_mode'", "XF_PROJ_ROOT='${GITHUB_LIBS_REPO}/Vitis-Tutorials'", "PLATFORM_REPO_PATHS='$PLATFORM_PATH'", "SpriteEnable=1" ], "category": "canary", "custom_build_target": { "all": "run", diff --git a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/multi_params.json b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/multi_params.json index fd8443498e..f2ecbe6df0 100644 --- a/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/multi_params.json +++ b/Hardware_Acceleration/Design_Tutorials/02-bloom/makefile/multi_params.json @@ -4,19 +4,19 @@ "STEP": "split_buffer", "ITER": "2", "PF": "8", - "SpriteEn": "1" + "SpriteEnable": "1" }, "sw_overlap": { "STEP": "sw_overlap", "ITER": "8", "PF": "8", - "SpriteEn": "1" + "SpriteEnable": "1" }, "multiDDR": { "STEP": "multiDDR", "ITER": "8", "PF": "8", - "SpriteEn": "1" + "SpriteEnable": "1" } } } diff --git a/Hardware_Acceleration/Design_Tutorials/03-rtl_stream_kernel_integration/hw/Makefile b/Hardware_Acceleration/Design_Tutorials/03-rtl_stream_kernel_integration/hw/Makefile index 3889614872..c5402ae614 100755 --- a/Hardware_Acceleration/Design_Tutorials/03-rtl_stream_kernel_integration/hw/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/03-rtl_stream_kernel_integration/hw/Makefile @@ -33,9 +33,8 @@ help:: $(ECHO) "--------------------------------------------------------------------------------------------------------------------------------" $(ECHO) "" $(ECHO) "Supported Platform (platform_name):" - $(ECHO) " xilinx_u200_xdma_201830_2" - $(ECHO) " xilinx_u200_gen3x16_xdma_1_1_202020_1" - $(ECHO) " xilinx_u250_xdma_201830_2" + $(ECHO) " xilinx_u200_gen3x16_xdma_2_202110_1" + $(ECHO) " xilinx_u250_gen3x16_xdma_2_1_202010_1" $(ECHO) " xilinx_u250_gen3x16_xdma_3_1_202020_1" $(ECHO) " xilinx_u50_gen3x16_xdma_201920_3" $(ECHO) " xilinx_u280_xdma_201920_3" diff --git a/Hardware_Acceleration/Design_Tutorials/04-traveling-salesperson/build/description.json b/Hardware_Acceleration/Design_Tutorials/04-traveling-salesperson/build/description.json index fed894c2a3..6d08558edf 100644 --- a/Hardware_Acceleration/Design_Tutorials/04-traveling-salesperson/build/description.json +++ b/Hardware_Acceleration/Design_Tutorials/04-traveling-salesperson/build/description.json @@ -3,7 +3,7 @@ "description": "04-traveling-salesperson", "flow": "hls", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "disable": 1, diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/Makefile b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/Makefile index a66d3df0d7..8ea85a1947 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/Makefile @@ -35,7 +35,6 @@ help:: PLATFORM := xilinx_u50_gen3x16_xdma_201920_3 - ## TARGET can be set as: ## sw_emu: software emulation ## hw_emu: hardware Emulation @@ -85,7 +84,7 @@ CXXFLAGS += -O2 -g -Wall -fmessage-length=0 -std=c++0x CXXLDFLAGS := -L$(XILINX_XRT)/lib/ #CXXLDFLAGS += -lxilinxopencl -lpthread -lrt -lstdc++ -CXXLDFLAGS += -lOpenCL -lpthread -lrt -lstdc++ +CXXLDFLAGS += -lOpenCL -pthread -lrt -lstdc++ ## Kernel Compiler and Linker Flags diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/description.json b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/description.json index f310d876ce..4cf27e3928 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/description.json +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module1_baseline/description.json @@ -3,7 +3,7 @@ "description": "06-cholesky-accel-module1", "flow": "vitis", - "platform_whitelist": ["u50"], + "platform_allowlist": ["u50"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/Makefile b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/Makefile index a66d3df0d7..75b7cc59f9 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/Makefile @@ -85,7 +85,7 @@ CXXFLAGS += -O2 -g -Wall -fmessage-length=0 -std=c++0x CXXLDFLAGS := -L$(XILINX_XRT)/lib/ #CXXLDFLAGS += -lxilinxopencl -lpthread -lrt -lstdc++ -CXXLDFLAGS += -lOpenCL -lpthread -lrt -lstdc++ +CXXLDFLAGS += -lOpenCL -pthread -lrt -lstdc++ ## Kernel Compiler and Linker Flags diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/description.json b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/description.json index d9905edc33..d931af6d5f 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/description.json +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module2_pipeline/description.json @@ -3,7 +3,7 @@ "description": "06-cholesky-accel-module2", "flow": "vitis", - "platform_whitelist": ["u50"], + "platform_allowlist": ["u50"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/Makefile b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/Makefile index a66d3df0d7..75b7cc59f9 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/Makefile @@ -85,7 +85,7 @@ CXXFLAGS += -O2 -g -Wall -fmessage-length=0 -std=c++0x CXXLDFLAGS := -L$(XILINX_XRT)/lib/ #CXXLDFLAGS += -lxilinxopencl -lpthread -lrt -lstdc++ -CXXLDFLAGS += -lOpenCL -lpthread -lrt -lstdc++ +CXXLDFLAGS += -lOpenCL -pthread -lrt -lstdc++ ## Kernel Compiler and Linker Flags diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/description.json b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/description.json index c148a7bfe1..b39f6c7c3b 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/description.json +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module3_datatype/description.json @@ -3,7 +3,7 @@ "description": "06-cholesky-accel-module3", "flow": "vitis", - "platform_whitelist": ["u50"], + "platform_allowlist": ["u50"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/Makefile b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/Makefile index a66d3df0d7..75b7cc59f9 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/Makefile @@ -85,7 +85,7 @@ CXXFLAGS += -O2 -g -Wall -fmessage-length=0 -std=c++0x CXXLDFLAGS := -L$(XILINX_XRT)/lib/ #CXXLDFLAGS += -lxilinxopencl -lpthread -lrt -lstdc++ -CXXLDFLAGS += -lOpenCL -lpthread -lrt -lstdc++ +CXXLDFLAGS += -lOpenCL -pthread -lrt -lstdc++ ## Kernel Compiler and Linker Flags diff --git a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/description.json b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/description.json index 8a9ffd9b6e..d774aa7044 100644 --- a/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/description.json +++ b/Hardware_Acceleration/Design_Tutorials/06-cholesky-accel/03-Algorithm_Acceleration/docs/module4_dataflow/description.json @@ -3,7 +3,7 @@ "description": "06-cholesky-accel-module4", "flow": "vitis", - "platform_whitelist": ["u50"], + "platform_allowlist": ["u50"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/Makefile b/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/Makefile index 0379b73615..26aec7761e 100644 --- a/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/Makefile +++ b/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/Makefile @@ -11,9 +11,7 @@ SIZE := 14 TARGET := hw_emu #TARGETS := hw_emu #TARGET := $(TARGETS) -DEVICE := xilinx_u200_gen3x16_xdma_1_202110_1 -#DEVICES := xilinx_u200_gen3x16_xdma_1_202110_1 -#DEVICE := $(DEVICES) +DEVICE := xilinx_u200_gen3x16_xdma_2_202110_1 LAB := pipeline #BUILDIR := $(LAB) XCLBIN := ./xclbin diff --git a/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/description.json b/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/description.json index 76c2dc3359..7b48a7396b 100644 --- a/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/description.json +++ b/Hardware_Acceleration/Design_Tutorials/07-host-code-opt/reference-files/description.json @@ -3,7 +3,7 @@ "description": "07-host-code-opt", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/Makefile index c24e4365ac..41cdbf8fb6 100644 --- a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/Makefile @@ -26,7 +26,7 @@ ABS_COMMON_REPO = $(shell readlink -f $(COMMON_REPO)) HOST := user TARGET := hw_emu #TARGET := $(TARGETS) -DEVICE := xilinx_u200_gen3x16_xdma_1_202110_1 +DEVICE := xilinx_u200_gen3x16_xdma_2_202110_1 XCLBIN := ./xclbin XO := ./xo @@ -51,8 +51,8 @@ ECHO:= @echo ###################################################################### host_CXXFLAGS += -g -I./ -I$(XILINX_XRT)/include -I$(XILINX_VIVADO)/include -Wall -O0 -g -std=c++1y # The below are linking flags for C++ Comnpiler -opencl_LDFLAGS += -L$(XILINX_XRT)/lib -lOpenCL -lpthread -xrt_LDFLAGS += -L$(XILINX_XRT)/lib -lxrt_coreutil -lpthread +opencl_LDFLAGS += -L$(XILINX_XRT)/lib -lOpenCL -pthread +xrt_LDFLAGS += -L$(XILINX_XRT)/lib -lxrt_coreutil -pthread CXXFLAGS += $(host_CXXFLAGS) diff --git a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/description.json index 64bda2c2b3..81ef6be968 100644 --- a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/description.json @@ -3,7 +3,7 @@ "description": "01-rtl_kernel_workflow", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/xrt.ini b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/xrt.ini index 102323f08b..025b1e0543 100644 --- a/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/xrt.ini +++ b/Hardware_Acceleration/Feature_Tutorials/01-rtl_kernel_workflow/reference-files/xrt.ini @@ -1,2 +1,2 @@ [Debug] -opencl_summary=true +native_xrt_trace=true diff --git a/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/Makefile index f19c22e701..24705fa0ee 100644 --- a/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/Makefile @@ -16,7 +16,7 @@ help: @echo "" ####################################################################################### TARGET := hw_emu -DEVICE := xilinx_u200_gen3x16_xdma_1_202110_1 +DEVICE := xilinx_u200_gen3x16_xdma_2_202110_1 LAB := run1 EXECUTABLE := host XO := krnl_vadd.$(TARGET).$(DEVICE).xo @@ -25,7 +25,7 @@ XCLBIN := krnl_vadd.$(TARGET).$(DEVICE).xclbin RTL_KRNL := ./src/rtl_kernel/rtl_kernel_wizard_0.xo # Host building global settings -CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -lOpenCL -lpthread -lrt -lstdc++ +CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -lOpenCL -pthread -lrt -lstdc++ CXXFLAGS2 := -lOpenCL # Kernel compiler & linker global settings diff --git a/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/description.json index 58bbc1fb5a..e3e15caeae 100644 --- a/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/02-mixing-c-rtl-kernels/reference-files/description.json @@ -3,7 +3,7 @@ "description": "02-mixing-c-rtl-kernels", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/03-dataflow_debug_and_optimization/reference_files/description.json b/Hardware_Acceleration/Feature_Tutorials/03-dataflow_debug_and_optimization/reference_files/description.json index 8e1d26ecd7..d3ca7b83a6 100644 --- a/Hardware_Acceleration/Feature_Tutorials/03-dataflow_debug_and_optimization/reference_files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/03-dataflow_debug_and_optimization/reference_files/description.json @@ -3,7 +3,7 @@ "description": "03-dataflow_debug_and_optimization", "flow": "hls", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "disable": false, diff --git a/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/Makefile index 9376552eee..e07b9860be 100644 --- a/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/Makefile @@ -2,7 +2,7 @@ VPP := $(XILINX_VITIS)/bin/v++ EMCONFIGUTIL := $(XILINX_VITIS)/bin/emconfigutil TARGET := hw_emu LAB := run1 -PLATFORM := xilinx_u200_gen3x16_xdma_1_202110_1 +PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1 # sources KERNEL_SRC := src/vadd.cpp @@ -25,7 +25,7 @@ endif VPP_COMMON_OPTS := -s -t $(TARGET) --platform $(PLATFORM) CFLAGS := -g -std=c++11 -I$(XILINX_XRT)/include -LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -lpthread -lrt +LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -pthread -lrt NUMDEVICES := 1 # run time args diff --git a/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/description.json index f2fa94f03d..b7a914c8f1 100644 --- a/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/04-mult-ddr-banks/reference-files/description.json @@ -3,7 +3,7 @@ "description": "04-mult-ddr-banks", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/README.md b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/README.md index 285daedd2b..9f7d0143b9 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/README.md +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/README.md @@ -1,6 +1,6 @@  - @@ -31,14 +31,16 @@ During this tutorial, you will: 3. Alter the kernel linking process to create multiple CUs of the same kernel. 4. Re-run the hardware emulation and confirm the parallel execution of the CUs. -This tutorial uses an image filter example to demonstrate the multiple CU feature. The host application processes the image, extracts Y, U, and V planes, and then runs the kernel three times to filter each plane of an image. By default, these three kernels run sequentially, using the same hardware resources because the FPGA only contains a single CU of the kernel. This tutorial demonstrates how to increase the number of CU, and then executing the kernel runs in parallel. +This tutorial uses an image filter example to demonstrate the multiple CU feature. To keep this tutorial design simple the host application uses random data for the pixel instead of a real image. By default, these three kernels run sequentially, using the same hardware resources because the FPGA only contains a single CU of the kernel. This tutorial demonstrates how to increase the number of CU, and then executing the kernel runs in parallel. + +An OpenCV version of the host code is also provided in the source code directory `src/host/host_opencv.cpp`, however instruction to use the OpenCV version of the host code is not provided in this tutorial. The OpenCV version of the host-code can be used after installing OpenCV library and make necessary changes related to OpenCV settings in the `Makefile`. ## Before You Begin This tutorial uses: * BASH Linux shell commands -* 2020.2 Vitis core development kit release and the *xilinx_u200_gen3x16_xdma_1_202110_1* platform. +* 2021.2 Vitis core development kit release and the *xilinx_u200_gen3x16_xdma_1_202110_1* platform. If necessary, it can be easily extended to other versions and platforms. >**IMPORTANT:** @@ -46,10 +48,6 @@ If necessary, it can be easily extended to other versions and platforms. >* Before to running any of the examples, make sure you have installed the Vitis core development kit as described in [Installation](https://www.xilinx.com/html_docs/xilinx2021_1/vitis_doc/acceleration_installation.html#vhc1571429852245) in the Application Acceleration Development flow of the Vitis Unified Software Platform Documentation (UG1416). >* If you run applications on Xilinx® Alveo™ Data Center accelerator cards, ensure the card and software drivers have been correctly installed by following the instructions on the [Alveo Portfolio page](https://www.xilinx.com/products/boards-and-kits/alveo.html). ->* This tutorial module contains a pre-compiled OpenCV™ library compiled by gcc-6.2.0, and requires gcc/g++ version 5.5 at least, or will return an error during host code compilation. You must also set the LD_LIBRARY_PATH using the following code to pick the required runtime library related to gcc-6: -> ``` -> export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$XILINX_VITIS/lib/lnx64.o/Default -> ``` ### Accessing the Tutorial Reference Files @@ -63,13 +61,7 @@ You can observe the Makefile used for this tutorial in `reference-files/Makefile * **VPP**: Vitis compiler path to compile the kernel code. * **EMCONFIGUTIL**: The path of the utility that creates emulation configuration file, `emconfig.json`. * **DEVICE**: The target platform. -* **LFLAGS**: The linker option using the OpenCV library for the host code linker. - - ``` - -Wl,-rpath,./opencv/opencv_gcc -L./opencv/opencv_gcc -lopencv_core -lopencv_highgui - ``` - -* **EXE_OPT**: The runtime options passed as command line arguments: Compiled kernel `xclbin` file, input image. +* **EXE_OPT**: The runtime options passed as command line arguments: Compiled kernel `xclbin` file. ### Run Hardware Emulation @@ -87,7 +79,7 @@ For hardware emulation (`hw_emu`), the kernel code is compiled into a hardware m 1. While the emulation run is executing, in another terminal, open the `src/host/host.cpp` file. -2. Inspect lines 255-257. You can see that the Filter function is called three times for the Y, U, and V channels. +2. Inspect lines 234-236. You can see that the Filter function is called three times for the Y, U, and V channels. ``` request[xx*3+0] = Filter(coeff.data(), y_src.data(), width, height, stride, y_dst.data()); @@ -95,7 +87,7 @@ For hardware emulation (`hw_emu`), the kernel code is compiled into a hardware m request[xx*3+2] = Filter(coeff.data(), v_src.data(), width, height, stride, v_dst.data()); ``` - This function is described from line 80. Here, you can see kernel arguments are set, and the kernel is executed by the `clEnqueueTask` command. + This function is described from line 78. Here, you can see kernel arguments are set, and the kernel is executed by the `clEnqueueTask` command. ``` // Set the kernel arguments @@ -113,7 +105,7 @@ For hardware emulation (`hw_emu`), the kernel code is compiled into a hardware m clEnqueueTask(mQueue, mKernel, 1, &req->mEvent[0], &req->mEvent[1]); ``` - All three `clEnqueueTask` commands are enqueued using a single in-order command queue (line 75). As a result, all the commands are executed sequentially in the order they are added to the queue. + All three `clEnqueueTask` commands are enqueued using a single in-order command queue (line 73). As a result, all the commands are executed sequentially in the order they are added to the queue. ``` Filter2DDispatcher( @@ -122,7 +114,7 @@ For hardware emulation (`hw_emu`), the kernel code is compiled into a hardware m cl_program &Program ) { mKernel = clCreateKernel(Program, "Filter2DKernel", &mErr); - mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFI LING_ENABLE, &mErr); + mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFILING_ENABLE, &mErr); mContext = Context; mCounter = 0; } @@ -133,7 +125,7 @@ For hardware emulation (`hw_emu`), the kernel code is compiled into a hardware m Review the generated Timeline Trace report (`opencl_trace.csv`). ``` - vitis_analyzer filter2d.hw_emu.xclbin.run_summary + vitis_analyzer xrt.run_summary ``` >**NOTE:** The run directory contains a file named `xrt.ini`. This file contains runtime options that generate additional reports such as the Profile Summary report and Timeline Trace. @@ -146,7 +138,7 @@ Review the generated Timeline Trace report (`opencl_trace.csv`). ### Improve the Host Code for Concurrent Kernel Enqueuing -1. Edit the `src/host/host.cpp` host file to change line 75. You will change this line to declare the command queue as an _out-of-order_ command queue. +1. Edit the `src/host/host.cpp` host file to change line 73. You will change this line to declare the command queue as an _out-of-order_ command queue. Code before the change: ``` @@ -182,10 +174,10 @@ nk = Filter2DKernel:3 ``` make clean - make run MODE=hw_emu + make run TARGET=hw_emu ``` -2. View the new `xclbin.run_summary` in the Vitis analyzer. +2. View the new `xrt.run_summary` in the Vitis analyzer. You can now see that the application takes advantage of the three CUs, and that the kernel executions overlaps and executes in parallel, speeding up the overall application. ![missing image](./images/overlapping_kernels_vitis_2.JPG) diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/Makefile index 23a6039521..37cb44ef8a 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/Makefile @@ -29,14 +29,13 @@ endif # v++ Compiler options VPP_COMMON_OPTS := -s -g -t $(TARGET) --platform $(DEVICE) -CFLAGS := -g -std=c++11 -I$(XILINX_XRT)/include -I${XILINX_VIVADO}/include -CFLAGS += -I./opencv -LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -lrt -fopenmp -Wl,--as-needed -Wl,-rpath,./opencv/opencv_gcc -L./opencv/opencv_gcc -lopencv_core -lopencv_highgui +CFLAGS := -g -std=c++11 -I$(XILINX_XRT)/include +LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -lrt -fopenmp -Wl,--as-needed NUMDEVICES := 1 # run time args -EXE_OPT := -x filter2d.${TARGET}.xclbin -i ./img/test.bmp -n 1 +EXE_OPT := -x filter2d.${TARGET}.xclbin -n 1 # primary build targets .PHONY: xclbin host all @@ -47,7 +46,7 @@ host: $(HOST_EXE) all: xclbin host clean: - -$(RM) $(EMCONFIG_FILE) $(HOST_EXE) $(XCLBIN) $(KERNEL_XO) + -$(RM) $(EMCONFIG_FILE) $(HOST_EXE) *.xclbin *.xo $(XCLBIN): $(KERNEL_XO) $(VPP) $(VPP_COMMON_OPTS) -l -o $@ $+ $(VPP_LINK_OPTS) diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/description.json index 49ecd54ed8..15b780f89d 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/description.json @@ -3,7 +3,7 @@ "description": "05-using-multiple-cu", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link.cfg b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link.cfg index bee5783e0d..b0395ad50e 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link.cfg +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link.cfg @@ -1,6 +1,8 @@ platform=xilinx_u200_gen3x16_xdma_1_202110_1 debug=1 +[profile] +data=all:all:all:all + [connectivity] nk=Filter2DKernel:1 - diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link3.cfg b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link3.cfg index dd530f26fd..563ac22123 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link3.cfg +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/link3.cfg @@ -1,3 +1,4 @@ +platform=xilinx_u200_gen3x16_xdma_1_202110_1 debug=1 [profile] diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host-final.cpp b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host-final.cpp index f9450d1b5f..3bc73d244a 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host-final.cpp +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host-final.cpp @@ -5,11 +5,11 @@ #include #include #include +#include #include "logger.h" #include "cmdlineparser.h" #include "xclbin_helper.h" -#include "opencv2/opencv.hpp" #include "coefficients.h" #include "filter2d.h" @@ -17,8 +17,6 @@ using namespace sda; using namespace sda::utils; -static void IplImage2Raw(IplImage* img, uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v); -static void Raw2IplImage(uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v, IplImage* img); // ------------------------------------------------------------------------------------------- // An event callback function that prints the operations performed by the OpenCL runtime. @@ -73,7 +71,7 @@ class Filter2DDispatcher { { mKernel = clCreateKernel(Program, "Filter2DKernel", &mErr); //mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFILING_ENABLE, &mErr); - mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &mErr); + mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFILING_ENABLE|CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &mErr); mContext = Context; mCounter = 0; } @@ -140,6 +138,9 @@ class Filter2DDispatcher { }; +using uchar = unsigned char; + + int main(int argc, char** argv) { @@ -153,21 +154,18 @@ int main(int argc, char** argv) CmdLineParser parser; parser.addSwitch("--nruns", "-n", "Number of times to image is processed", "1"); parser.addSwitch("--fpga", "-x", "FPGA binary (xclbin) file to use", "xclbin/fpga.hw.xilinx_aws-vu9p-f1_4ddr-xpr-2pr_4_0.awsxclbin"); - parser.addSwitch("--input", "-i", "Input image file"); parser.addSwitch("--filter", "-f", "Filter type (0-3)", "0"); + parser.addSwitch("--width", "-w", "Image width", "64"); + parser.addSwitch("--height", "-h", "Image height", "256"); //parse all command line options parser.parse(argc, argv); - string inputImage = parser.value("input"); string fpgaBinary = parser.value("fpga"); + unsigned int width = parser.value_to_int("width"); + unsigned int height = parser.value_to_int("height"); int numRuns = parser.value_to_int("nruns"); int coeffs = parser.value_to_int("filter"); - if (inputImage.size() == 0) { - std::cout << std::endl; - std::cout << "ERROR: input image file must be specified using -i command line switch" << std::endl; - exit(1); - } if ((coeffs<0) || (coeffs>3)) { std::cout << std::endl; std::cout << "ERROR: Supported filter type values are [0:3]" << std::endl; @@ -176,7 +174,6 @@ int main(int argc, char** argv) std::cout << std::endl; std::cout << "FPGA binary : " << fpgaBinary << std::endl; - std::cout << "Input image : " << inputImage << std::endl; std::cout << "Number of runs : " << numRuns << std::endl; std::cout << "Filter type : " << coeffs << std::endl; std::cout << std::endl; @@ -197,19 +194,7 @@ int main(int argc, char** argv) // Read input image and format inputs // --------------------------------------------------------------------------------- - // Create filenames for input and ouput images - std::string srcFileName = inputImage; - std::string dstFileName = inputImage.substr(0, inputImage.size()-4)+"_out.bmp"; - - // Read Input image - IplImage *src, *dst; - src = cvLoadImage(srcFileName.c_str()); //format is BGR - if(!src) { - std::cout << "ERROR: Loading image " << srcFileName << " failed" << std::endl; - exit(1); - } - unsigned width = src->width; - unsigned height = src->height; + unsigned stride = width; unsigned nbytes = (stride*height); @@ -223,12 +208,6 @@ int main(int argc, char** argv) std::vector> coeff(FILTER2D_KERNEL_V_SIZE*FILTER2D_KERNEL_V_SIZE); - // Create destination image - dst = cvCreateImage(cvSize(width, height), src->depth, src->nChannels); - - // Convert CV Image to AXI video data - IplImage2Raw(src, y_src.data(), stride, u_src.data(), stride, v_src.data(), stride); - // Copy coefficients to 4k aligned vector memcpy(coeff.data() , &filterCoeffs[coeffs][0][0], coeff.size()*sizeof(short) ); @@ -266,17 +245,6 @@ int main(int argc, char** argv) auto fpga_end = std::chrono::high_resolution_clock::now(); - // --------------------------------------------------------------------------------- - // Format output and write image out - // --------------------------------------------------------------------------------- - - // Convert processed image back to CV Image - Raw2IplImage(y_dst.data(), stride, u_dst.data(), stride, v_dst.data(), stride, dst); - - // Convert image to cvMat and write it to disk - cvConvert( dst, cvCreateMat(height, width, CV_32FC3 ) ); - cvSaveImage(dstFileName.c_str(), dst); - // --------------------------------------------------------------------------------- // Compute reference results and compare @@ -303,11 +271,6 @@ int main(int argc, char** argv) auto cpu_end = std::chrono::high_resolution_clock::now(); - std::string refFileName = inputImage.substr(0, inputImage.size()-4)+"_ref.bmp"; - Raw2IplImage(y_ref.data(), stride, u_ref.data(), stride, v_ref.data(), stride, dst); - cvConvert( dst, cvCreateMat(height, width, CV_32FC3 ) ); - cvSaveImage(refFileName.c_str(), dst); - // Compare results bool diff = false; for (int y = 0; y < height; y++) { @@ -347,44 +310,9 @@ int main(int argc, char** argv) } // Release allocated memory - cvReleaseImage(&src); - cvReleaseImage(&dst); clReleaseProgram(program); clReleaseContext(context); clReleaseDevice(device); return (diff?1:0); } - - -static void IplImage2Raw(IplImage* img, uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v) -{ - // Assumes RGB or YUV 4:4:4 - for (int y = 0; y < img->height; y++) - { - for (int x = 0; x < img->width; x++) - { - CvScalar cv_pix = cvGet2D(img, y, x); - y_buf[y*stride_y+x] = (uchar)cv_pix.val[0]; - u_buf[y*stride_u+x] = (uchar)cv_pix.val[1]; - v_buf[y*stride_v+x] = (uchar)cv_pix.val[2]; - } - } -} - -static void Raw2IplImage(uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v, IplImage* img ) -{ - // Assumes RGB or YUV 4:4:4 - for (int y = 0; y < img->height; y++) - { - for (int x = 0; x < img->width; x++) - { - CvScalar cv_pix; - cv_pix.val[0] = y_buf[y*stride_y+x]; - cv_pix.val[1] = u_buf[y*stride_u+x]; - cv_pix.val[2] = v_buf[y*stride_v+x]; - cvSet2D(img, y, x, cv_pix); - } - } -} - diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host.cpp b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host.cpp index 50a06b4a1c..bffffe8f60 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host.cpp +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host.cpp @@ -5,11 +5,11 @@ #include #include #include +#include #include "logger.h" #include "cmdlineparser.h" #include "xclbin_helper.h" -#include "opencv2/opencv.hpp" #include "coefficients.h" #include "filter2d.h" @@ -17,8 +17,6 @@ using namespace sda; using namespace sda::utils; -static void IplImage2Raw(IplImage* img, uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v); -static void Raw2IplImage(uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v, IplImage* img); // ------------------------------------------------------------------------------------------- // An event callback function that prints the operations performed by the OpenCL runtime. @@ -139,6 +137,9 @@ class Filter2DDispatcher { }; +using uchar = unsigned char; + + int main(int argc, char** argv) { @@ -152,21 +153,18 @@ int main(int argc, char** argv) CmdLineParser parser; parser.addSwitch("--nruns", "-n", "Number of times to image is processed", "1"); parser.addSwitch("--fpga", "-x", "FPGA binary (xclbin) file to use", "xclbin/fpga.hw.xilinx_aws-vu9p-f1_4ddr-xpr-2pr_4_0.awsxclbin"); - parser.addSwitch("--input", "-i", "Input image file"); parser.addSwitch("--filter", "-f", "Filter type (0-3)", "0"); + parser.addSwitch("--width", "-w", "Image width", "64"); + parser.addSwitch("--height", "-h", "Image height", "256"); //parse all command line options parser.parse(argc, argv); - string inputImage = parser.value("input"); string fpgaBinary = parser.value("fpga"); + unsigned int width = parser.value_to_int("width"); + unsigned int height = parser.value_to_int("height"); int numRuns = parser.value_to_int("nruns"); int coeffs = parser.value_to_int("filter"); - if (inputImage.size() == 0) { - std::cout << std::endl; - std::cout << "ERROR: input image file must be specified using -i command line switch" << std::endl; - exit(1); - } if ((coeffs<0) || (coeffs>3)) { std::cout << std::endl; std::cout << "ERROR: Supported filter type values are [0:3]" << std::endl; @@ -175,7 +173,6 @@ int main(int argc, char** argv) std::cout << std::endl; std::cout << "FPGA binary : " << fpgaBinary << std::endl; - std::cout << "Input image : " << inputImage << std::endl; std::cout << "Number of runs : " << numRuns << std::endl; std::cout << "Filter type : " << coeffs << std::endl; std::cout << std::endl; @@ -196,19 +193,7 @@ int main(int argc, char** argv) // Read input image and format inputs // --------------------------------------------------------------------------------- - // Create filenames for input and ouput images - std::string srcFileName = inputImage; - std::string dstFileName = inputImage.substr(0, inputImage.size()-4)+"_out.bmp"; - - // Read Input image - IplImage *src, *dst; - src = cvLoadImage(srcFileName.c_str()); //format is BGR - if(!src) { - std::cout << "ERROR: Loading image " << srcFileName << " failed" << std::endl; - exit(1); - } - unsigned width = src->width; - unsigned height = src->height; + unsigned stride = width; unsigned nbytes = (stride*height); @@ -222,12 +207,6 @@ int main(int argc, char** argv) std::vector> coeff(FILTER2D_KERNEL_V_SIZE*FILTER2D_KERNEL_V_SIZE); - // Create destination image - dst = cvCreateImage(cvSize(width, height), src->depth, src->nChannels); - - // Convert CV Image to AXI video data - IplImage2Raw(src, y_src.data(), stride, u_src.data(), stride, v_src.data(), stride); - // Copy coefficients to 4k aligned vector memcpy(coeff.data() , &filterCoeffs[coeffs][0][0], coeff.size()*sizeof(short) ); @@ -265,17 +244,6 @@ int main(int argc, char** argv) auto fpga_end = std::chrono::high_resolution_clock::now(); - // --------------------------------------------------------------------------------- - // Format output and write image out - // --------------------------------------------------------------------------------- - - // Convert processed image back to CV Image - Raw2IplImage(y_dst.data(), stride, u_dst.data(), stride, v_dst.data(), stride, dst); - - // Convert image to cvMat and write it to disk - cvConvert( dst, cvCreateMat(height, width, CV_32FC3 ) ); - cvSaveImage(dstFileName.c_str(), dst); - // --------------------------------------------------------------------------------- // Compute reference results and compare @@ -302,11 +270,6 @@ int main(int argc, char** argv) auto cpu_end = std::chrono::high_resolution_clock::now(); - std::string refFileName = inputImage.substr(0, inputImage.size()-4)+"_ref.bmp"; - Raw2IplImage(y_ref.data(), stride, u_ref.data(), stride, v_ref.data(), stride, dst); - cvConvert( dst, cvCreateMat(height, width, CV_32FC3 ) ); - cvSaveImage(refFileName.c_str(), dst); - // Compare results bool diff = false; for (int y = 0; y < height; y++) { @@ -346,44 +309,9 @@ int main(int argc, char** argv) } // Release allocated memory - cvReleaseImage(&src); - cvReleaseImage(&dst); clReleaseProgram(program); clReleaseContext(context); clReleaseDevice(device); return (diff?1:0); } - - -static void IplImage2Raw(IplImage* img, uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v) -{ - // Assumes RGB or YUV 4:4:4 - for (int y = 0; y < img->height; y++) - { - for (int x = 0; x < img->width; x++) - { - CvScalar cv_pix = cvGet2D(img, y, x); - y_buf[y*stride_y+x] = (uchar)cv_pix.val[0]; - u_buf[y*stride_u+x] = (uchar)cv_pix.val[1]; - v_buf[y*stride_v+x] = (uchar)cv_pix.val[2]; - } - } -} - -static void Raw2IplImage(uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v, IplImage* img ) -{ - // Assumes RGB or YUV 4:4:4 - for (int y = 0; y < img->height; y++) - { - for (int x = 0; x < img->width; x++) - { - CvScalar cv_pix; - cv_pix.val[0] = y_buf[y*stride_y+x]; - cv_pix.val[1] = u_buf[y*stride_u+x]; - cv_pix.val[2] = v_buf[y*stride_v+x]; - cvSet2D(img, y, x, cv_pix); - } - } -} - diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host_opencv.cpp b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host_opencv.cpp new file mode 100644 index 0000000000..aeebf5576b --- /dev/null +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/src/host/host_opencv.cpp @@ -0,0 +1,388 @@ +#include +#include +#include +#include +#include +#include +#include +#include "logger.h" +#include "cmdlineparser.h" + +#include "xclbin_helper.h" +#include "opencv2/opencv.hpp" + +#include "coefficients.h" +#include "filter2d.h" + +using namespace sda; +using namespace sda::utils; + +static void IplImage2Raw(IplImage* img, uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v); +static void Raw2IplImage(uchar* y, int stride_y, uchar* u, int stride_u, uchar* v, int stride_v, IplImage* img); + +// ------------------------------------------------------------------------------------------- +// An event callback function that prints the operations performed by the OpenCL runtime. +// ------------------------------------------------------------------------------------------- +void event_cb(cl_event event, cl_int cmd_status, void *id) +{ + if (getenv("XCL_EMULATION_MODE") != NULL) { + std::cout << " kernel finished processing request " << *(int *)id << std::endl; + } +} + +// ------------------------------------------------------------------------------------------- +// Struct returned by BlurDispatcher() and used to keep track of the request sent to the kernel +// The sync() method waits for completion of the request. After it returns, results are ready +// ------------------------------------------------------------------------------------------- +struct Filter2DRequest { + + cl_event mEvent[3]; + int mId; + + Filter2DRequest(int id) { + mId = id; + } + + void sync() + { + // Wait until the outputs have been read back + clWaitForEvents(1, &mEvent[2]); + clReleaseEvent(mEvent[0]); + clReleaseEvent(mEvent[1]); + clReleaseEvent(mEvent[2]); + } + +}; + + +// ------------------------------------------------------------------------------------------- +// Class used to dispatch requests to the kernel +// The BlurDispatcher() method schedules the necessary operations (write, kernel, read) and +// returns a BlurRequest* struct which can be used to track the completion of the request. +// The dispatcher has its own OOO command queue allowing multiple requests to be scheduled +// and executed independently by the OpenCL runtime. +// ------------------------------------------------------------------------------------------- +class Filter2DDispatcher { + +public: + + Filter2DDispatcher( + cl_device_id &Device, + cl_context &Context, + cl_program &Program ) + { + mKernel = clCreateKernel(Program, "Filter2DKernel", &mErr); + mQueue = clCreateCommandQueue(Context, Device, CL_QUEUE_PROFILING_ENABLE, &mErr); + mContext = Context; + mCounter = 0; + } + + Filter2DRequest* operator() ( + short *coeffs, + unsigned char *src, + unsigned int width, + unsigned int height, + unsigned int stride, + unsigned char *dst ) + { + + Filter2DRequest* req = new Filter2DRequest(mCounter++); + + unsigned nbytes = (stride*height); + + // Create input buffers for coefficients (host to device) + mSrcBuf[0] = clCreateBuffer(mContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, (FILTER2D_KERNEL_V_SIZE*FILTER2D_KERNEL_V_SIZE)*sizeof(short), coeffs, &mErr); + + // Create input buffer for src (host to device) + mSrcBuf[1] = clCreateBuffer(mContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, nbytes, src, &mErr); + + // Create output buffer for dst (device to host) + mDstBuf[0] = clCreateBuffer(mContext,CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, nbytes, dst, &mErr); + + // Set the kernel arguments + clSetKernelArg(mKernel, 0, sizeof(cl_mem), &mSrcBuf[0]); + clSetKernelArg(mKernel, 1, sizeof(cl_mem), &mSrcBuf[1]); + clSetKernelArg(mKernel, 2, sizeof(unsigned int), &width); + clSetKernelArg(mKernel, 3, sizeof(unsigned int), &height); + clSetKernelArg(mKernel, 4, sizeof(unsigned int), &stride); + clSetKernelArg(mKernel, 5, sizeof(cl_mem), &mDstBuf[0]); + + // Schedule the writing of the inputs + clEnqueueMigrateMemObjects(mQueue, 2, mSrcBuf, 0, 0, nullptr, &req->mEvent[0]); + + // Schedule the execution of the kernel + clEnqueueTask(mQueue, mKernel, 1, &req->mEvent[0], &req->mEvent[1]); + + // Schedule the reading of the outputs + clEnqueueMigrateMemObjects(mQueue, 1, mDstBuf, CL_MIGRATE_MEM_OBJECT_HOST, 1, &req->mEvent[1], &req->mEvent[2]); + + // Register call back to notify of kernel completion + clSetEventCallback(req->mEvent[1], CL_COMPLETE, event_cb, &req->mId); + + return req; + }; + + ~Filter2DDispatcher() + { + clReleaseCommandQueue(mQueue); + clReleaseKernel(mKernel); + }; + +private: + cl_kernel mKernel; + cl_command_queue mQueue; + cl_context mContext; + cl_mem mSrcBuf[2]; + cl_mem mDstBuf[1]; + cl_int mErr; + int mCounter; +}; + + + +int main(int argc, char** argv) +{ + std::cout << std::endl; + std::cout << "Xilinx 2D Filter Example Application\n"; + + // --------------------------------------------------------------------------------- + // Parse command line + // --------------------------------------------------------------------------------- + + CmdLineParser parser; + parser.addSwitch("--nruns", "-n", "Number of times to image is processed", "1"); + parser.addSwitch("--fpga", "-x", "FPGA binary (xclbin) file to use", "xclbin/fpga.hw.xilinx_aws-vu9p-f1_4ddr-xpr-2pr_4_0.awsxclbin"); + parser.addSwitch("--input", "-i", "Input image file"); + parser.addSwitch("--filter", "-f", "Filter type (0-3)", "0"); + + //parse all command line options + parser.parse(argc, argv); + string inputImage = parser.value("input"); + string fpgaBinary = parser.value("fpga"); + int numRuns = parser.value_to_int("nruns"); + int coeffs = parser.value_to_int("filter"); + + if (inputImage.size() == 0) { + std::cout << std::endl; + std::cout << "ERROR: input image file must be specified using -i command line switch" << std::endl; + exit(1); + } + if ((coeffs<0) || (coeffs>3)) { + std::cout << std::endl; + std::cout << "ERROR: Supported filter type values are [0:3]" << std::endl; + exit(1); + } + + std::cout << std::endl; + std::cout << "FPGA binary : " << fpgaBinary << std::endl; + std::cout << "Input image : " << inputImage << std::endl; + std::cout << "Number of runs : " << numRuns << std::endl; + std::cout << "Filter type : " << coeffs << std::endl; + std::cout << std::endl; + + + // --------------------------------------------------------------------------------- + // Load XCLBIN file, create OpenCL context, device and program + // --------------------------------------------------------------------------------- + + std::cout << "Programming FPGA" << std::endl; + cl_context context; + cl_program program; + cl_device_id device; + load_xclbin_file(fpgaBinary.c_str(), context, device, program); + + + // --------------------------------------------------------------------------------- + // Read input image and format inputs + // --------------------------------------------------------------------------------- + + // Create filenames for input and ouput images + std::string srcFileName = inputImage; + std::string dstFileName = inputImage.substr(0, inputImage.size()-4)+"_out.bmp"; + + // Read Input image + IplImage *src, *dst; + src = cvLoadImage(srcFileName.c_str()); //format is BGR + if(!src) { + std::cout << "ERROR: Loading image " << srcFileName << " failed" << std::endl; + exit(1); + } + unsigned width = src->width; + unsigned height = src->height; + unsigned stride = width; + unsigned nbytes = (stride*height); + + // 4k aligned buffers for efficient data transfer to the kernel + std::vector> y_src(nbytes); + std::vector> u_src(nbytes); + std::vector> v_src(nbytes); + std::vector> y_dst(nbytes); + std::vector> u_dst(nbytes); + std::vector> v_dst(nbytes); + std::vector> coeff(FILTER2D_KERNEL_V_SIZE*FILTER2D_KERNEL_V_SIZE); + + + // Create destination image + dst = cvCreateImage(cvSize(width, height), src->depth, src->nChannels); + + // Convert CV Image to AXI video data + IplImage2Raw(src, y_src.data(), stride, u_src.data(), stride, v_src.data(), stride); + + // Copy coefficients to 4k aligned vector + memcpy(coeff.data() , &filterCoeffs[coeffs][0][0], coeff.size()*sizeof(short) ); + + // --------------------------------------------------------------------------------- + // Make requests to kernel(s) + // --------------------------------------------------------------------------------- + // Note: change the number of kernels in the device, or reorder the sync() methods + // to see the impact on performance and how requests are scheduled. + // --------------------------------------------------------------------------------- + + std::cout << std::endl; + std::cout << "Running FPGA version" << std::endl; + + // Create a dispatcher of requests to the Blur kernel(s) + Filter2DDispatcher Filter(device, context, program); + + auto fpga_begin = std::chrono::high_resolution_clock::now(); + + Filter2DRequest* request[numRuns*3]; + for(int xx=0; xxsync(); + request[xx*3+1]->sync(); + request[xx*3+2]->sync(); + } + + + auto fpga_end = std::chrono::high_resolution_clock::now(); + + // --------------------------------------------------------------------------------- + // Format output and write image out + // --------------------------------------------------------------------------------- + + // Convert processed image back to CV Image + Raw2IplImage(y_dst.data(), stride, u_dst.data(), stride, v_dst.data(), stride, dst); + + // Convert image to cvMat and write it to disk + cvConvert( dst, cvCreateMat(height, width, CV_32FC3 ) ); + cvSaveImage(dstFileName.c_str(), dst); + + + // --------------------------------------------------------------------------------- + // Compute reference results and compare + // --------------------------------------------------------------------------------- + + std::cout << std::endl; + std::cout << "Running Software version" << std::endl; + + // Create output buffers for reference results + std::vector> y_ref(nbytes); + std::vector> u_ref(nbytes); + std::vector> v_ref(nbytes); + + auto cpu_begin = std::chrono::high_resolution_clock::now(); + + #pragma omp parallel for + for(int xx=0; xx fpga_duration = fpga_end - fpga_begin; + std::cout << "FPGA Time: " << fpga_duration.count() << " s" << std::endl; + std::cout << "FPGA Throughput: " + << (double) numRuns*3*nbytes / fpga_duration.count() / (1024.0*1024.0) + << " MB/s" << std::endl; + + std::chrono::duration cpu_duration = cpu_end - cpu_begin; + std::cout << "CPU Time: " << cpu_duration.count() << " s" << std::endl; + std::cout << "CPU Throughput: " + << (double) numRuns*3*nbytes / cpu_duration.count() / (1024.0*1024.0) + << " MB/s" << std::endl; + + std::cout << "FPGA Speedup: " << cpu_duration.count() / fpga_duration.count() << " x" << std::endl; + } + + // Release allocated memory + cvReleaseImage(&src); + cvReleaseImage(&dst); + clReleaseProgram(program); + clReleaseContext(context); + clReleaseDevice(device); + + return (diff?1:0); +} + + +static void IplImage2Raw(IplImage* img, uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v) +{ + // Assumes RGB or YUV 4:4:4 + for (int y = 0; y < img->height; y++) + { + for (int x = 0; x < img->width; x++) + { + CvScalar cv_pix = cvGet2D(img, y, x); + y_buf[y*stride_y+x] = (uchar)cv_pix.val[0]; + u_buf[y*stride_u+x] = (uchar)cv_pix.val[1]; + v_buf[y*stride_v+x] = (uchar)cv_pix.val[2]; + } + } +} + +static void Raw2IplImage(uchar* y_buf, int stride_y, uchar* u_buf, int stride_u, uchar* v_buf, int stride_v, IplImage* img ) +{ + // Assumes RGB or YUV 4:4:4 + for (int y = 0; y < img->height; y++) + { + for (int x = 0; x < img->width; x++) + { + CvScalar cv_pix; + cv_pix.val[0] = y_buf[y*stride_y+x]; + cv_pix.val[1] = u_buf[y*stride_u+x]; + cv_pix.val[2] = v_buf[y*stride_v+x]; + cvSet2D(img, y, x, cv_pix); + } + } +} diff --git a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/xrt.ini b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/xrt.ini index e1523168c2..a924f3ffec 100644 --- a/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/xrt.ini +++ b/Hardware_Acceleration/Feature_Tutorials/05-using-multiple-cu/reference-files/xrt.ini @@ -1,3 +1,3 @@ [Debug] -opencl_summary=true opencl_trace=true +data_transfer_trace=accel diff --git a/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/Makefile index 03640e2ca1..217b16045e 100644 --- a/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/Makefile @@ -19,13 +19,13 @@ help: @echo "" ####################################################################################### TARGET := hw -PLATFORM := xilinx_u200_gen3x16_xdma_1_202110_1 +PLATFORM := xilinx_u200_gen3x16_xdma_2_202110_1 HOST_EXE := host XO := apply_watermark.$(TARGET).$(PLATFORM).xo XCLBIN := apply_watermark.$(TARGET).$(PLATFORM).xclbin # Host building global settings -CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -lpthread -lrt -lstdc++ +CXXFLAGS := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -Wall -O0 -g -std=c++11 -L$(XILINX_XRT)/lib/ -pthread -lrt -lstdc++ CXXFLAGS2 := -lOpenCL # Kernel compiler & linker global settings diff --git a/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/description.json index de7af6bd4d..d40645e499 100644 --- a/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/06-controlling-vivado-implementation/reference-files/description.json @@ -3,7 +3,7 @@ "description": "06-controlling-vivado-implementation", "flow": "vitis", - "platform_whitelist": ["u200"], + "platform_allowlist": ["u200"], "testinfo": { "jobs": [ diff --git a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/Makefile b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/Makefile index b8ade5982d..7ac4ee16d7 100644 --- a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/Makefile @@ -15,7 +15,7 @@ endif include mem_connectivity.mk ifeq ($(memtype), DDR) - PLATFORM :=xilinx_u200_gen3x16_xdma_1_202110_1 + PLATFORM :=xilinx_u200_gen3x16_xdma_2_202110_1 else PLATFORM :=xilinx_u50_gen3x16_xdma_201920_3 endif diff --git a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/common.mk b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/common.mk index e291944cf3..ced3471d28 100644 --- a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/common.mk +++ b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/common.mk @@ -41,7 +41,7 @@ $(builddir)/host: $(HOST_SRC_CPP) -O3 -Wall -fmessage-length=0 -std=c++11\ $(HOST_SRC_CPP) \ -L$(XILINX_XRT)/lib/ \ - -lxilinxopencl -lpthread -lrt \ + -lxilinxopencl -pthread -lrt \ -o $(builddir)/host $(builddir)/$(EMCONFIG_FILE): diff --git a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/description.json b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/description.json index 03cc5f6cb4..b31d740886 100644 --- a/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/07-using-hbm/makefile/description.json @@ -3,7 +3,7 @@ "description": "07-using-hbm", "flow": "vitis", - "platform_whitelist": ["u200", "U50"], + "platform_allowlist": ["u200", "U50"], "testinfo": { "disable": 0, diff --git a/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/Makefile b/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/Makefile index b69ade3e04..6d4cfe5c05 100644 --- a/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/Makefile +++ b/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/Makefile @@ -28,7 +28,7 @@ endif CFLAGS := -g -std=c++14 -I$(XILINX_XRT)/include -LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -lpthread -lrt +LFLAGS := -L$(XILINX_XRT)/lib -lxilinxopencl -pthread -lrt #Run time arguments EXE_OPT := vadd.$(TARGET).$(LAB).xclbin diff --git a/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/description.json b/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/description.json index 1c7abbd29c..45050f56b9 100644 --- a/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/description.json +++ b/Hardware_Acceleration/Feature_Tutorials/08-using-hostmem/reference-files/description.json @@ -3,7 +3,7 @@ "description": "08-using-hostmem", "flow": "vitis", - "platform_whitelist": ["u250"], + "platform_allowlist": ["u250"], "testinfo": { "jobs": [ diff --git a/Jenkinsfile b/Jenkinsfile index 8e9179d60b..c301e2767e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,10 +1,10 @@ @Library('pipeline-library')_ -VitisLibPipeline (branch: 'master', libname: 'Vitis-Tutorials', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim', +VitisLibPipeline (branch: 'master', libname: 'Vitis-Tutorials', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build', forcerdi:'true', devtest: 'RunDevTest.sh', additional_json:'add_tutorials.json', NEW_GENERATE:"true", disable_standardization: "true", allowed_modes:"sw_emu,hw_emu,hw", yml:'2022.1/tutorial.yml', tql:'2022.1/tutorial.tql', - email: 'alokgupt@xilinx.com', TOOLVERSION: '2022.1_daily_latest') + email: 'randyh@xilinx.com', select_case: '/group/xcoswmktg/randyh/sprite-test-list.txt', TOOLVERSION: '2022.1_daily_latest')

Vitis™ Application Acceleration Development Flow Tutorials

+

Vitis™ Application Acceleration Development Flow Tutorials