diff --git a/DirectProgramming/C++SYCL_FPGA/README.md b/DirectProgramming/C++SYCL_FPGA/README.md index 572f975c36..ce47ef9990 100644 --- a/DirectProgramming/C++SYCL_FPGA/README.md +++ b/DirectProgramming/C++SYCL_FPGA/README.md @@ -175,6 +175,7 @@ All the Tier 4 samples are in the [ReferenceDesigns](ReferenceDesigns) category. | [board_test](ReferenceDesigns/board_test) | How to test board interfaces to ensure the designed platform provides expected performance | [cholesky](ReferenceDesigns/cholesky) | How to implement high performance matrix Cholesky decomposition on a FPGA | [cholesky_inversion](ReferenceDesigns/cholesky_inversion) | How to implement high performance Cholesky matrix decomposition on a FPGA +| [convolution_2d](ReferenceDesigns/convolution2d) | How to implement a 2D convolution IP component that can be exported to Intel® Quartus® Prime | [crr](ReferenceDesigns/crr) | How to implement the Cox-Ross-Rubinstein (CRR) binomial tree model on a FPGA | [db](ReferenceDesigns/db) | How to accelerate database queries using an FPGA | [decompress](ReferenceDesigns/decompress) | How to implement an efficient GZIP and Snappy decompression engine on a FPGA diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/.gitignore b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/.gitignore new file mode 100644 index 0000000000..c262a7ceae --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/.gitignore @@ -0,0 +1,2 @@ +*build*/ +.vscode/* \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/CMakeLists.txt b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/CMakeLists.txt new file mode 100644 index 0000000000..b874d81eea --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/CMakeLists.txt @@ -0,0 +1,324 @@ +# Direct CMake to use icpx rather than the default C++ compiler/linker on Linux +# and icx-cl on Windows +if(UNIX) + set(CMAKE_CXX_COMPILER icpx) +else() # Windows + include (CMakeForceCompiler) + CMAKE_FORCE_CXX_COMPILER (icx-cl IntelDPCPP) + include (Platform/Windows-Clang) +endif() + +cmake_minimum_required (VERSION 3.7.2) + +project(convolution2d CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +############################################################################### +### Customize these build variables +############################################################################### +set(SOURCE_FILES src/main.cpp) +set(TARGET_NAME convolution) + +# Use cmake -DFPGA_DEVICE=: to choose a +# different device. +# Note that depending on your installation, you may need to specify the full +# path to the board support package (BSP), this usually is in your install +# folder. +# +# You can also specify a device family (E.g. "Arria10" or "Stratix10") or a +# specific part number (E.g. "10AS066N3F40E2SG") to generate a standalone IP. +if(NOT DEFINED FPGA_DEVICE) + set(FPGA_DEVICE "Agilex7") +endif() + +# Use cmake -DUSER_FPGA_FLAGS= to set extra flags for FPGA backend +# compilation. +set(USER_FPGA_FLAGS ${USER_FPGA_FLAGS};-Xsclock=300MHz;-Xsoptimize=latency) + +if(NOT DEFINED PARALLEL_PIXELS) + SET(PARALLEL_PIXELS 2) +endif() + +if(NOT DEFINED PIXEL_BITS) + SET(PIXEL_BITS 10) +endif() + +if(NOT DEFINED WINDOW_SZ) + SET(WINDOW_SZ 3) +endif() + +if(NOT DEFINED MAX_COLS) + SET(MAX_COLS 4096) +endif() + +if(NOT DEFINED TEST_CONV2D_ISOLATED) + SET(TEST_CONV2D_ISOLATED 0) +endif() + +# Use cmake -DUSER_FLAGS= to set extra flags for general compilation. +set(USER_FLAGS ${USER_FLAGS};-DPARALLEL_PIXELS=${PARALLEL_PIXELS}; + -DPIXEL_BITS=${PIXEL_BITS}; + -DWINDOW_SZ=${WINDOW_SZ}; + -DMAX_COLS=${MAX_COLS} + -DTEST_CONV2D_ISOLATED=${TEST_CONV2D_ISOLATED};) + +# Use cmake -DUSER_INCLUDE_PATHS= to set extra paths for general +# compilation. +set(USER_INCLUDE_PATHS ./include;../../include;${USER_INCLUDE_PATHS}) + +############################################################################### +### no changes after here +############################################################################### + +# Print the device being used for the compiles +message(STATUS "Configuring the design to run on FPGA board ${FPGA_DEVICE}") + +# Set the names of the makefile targets to be generated by cmake +set(EMULATOR_TARGET fpga_emu) +set(SIMULATOR_TARGET fpga_sim) +set(REPORT_TARGET report) +set(FPGA_TARGET fpga) + +# Set the names of the generated files per makefile target +set(EMULATOR_OUTPUT_NAME ${TARGET_NAME}.${EMULATOR_TARGET}) +set(SIMULATOR_OUTPUT_NAME ${TARGET_NAME}.${SIMULATOR_TARGET}) +set(REPORT_OUTPUT_NAME ${TARGET_NAME}.${REPORT_TARGET}) +set(FPGA_OUTPUT_NAME ${TARGET_NAME}.${FPGA_TARGET}) + +message(STATUS "Additional USER_FPGA_FLAGS=${USER_FPGA_FLAGS}") +message(STATUS "Additional USER_FLAGS=${USER_FLAGS}") + +include_directories(${USER_INCLUDE_PATHS}) +message(STATUS "Additional USER_INCLUDE_PATHS=${USER_INCLUDE_PATHS}") + +link_directories(${USER_LIB_PATHS}) +message(STATUS "Additional USER_LIB_PATHS=${USER_LIB_PATHS}") + +link_libraries(${USER_LIBS}) +message(STATUS "Additional USER_LIBS=${USER_LIBS}") + +if(WIN32) + # add qactypes for Windows + set(QACTYPES "-Qactypes") + # This is a Windows-specific flag that enables exception handling in host code + set(WIN_FLAG "/EHsc") +else() + # add qactypes for Linux + set(QACTYPES "-qactypes") +endif() + +string(TOLOWER "${CMAKE_BUILD_TYPE}" LOWER_BUILD_TYPE) +if(LOWER_BUILD_TYPE MATCHES debug) +# Set debug flags + if(WIN32) + set(DEBUG_FLAGS /DEBUG /Od) + else() + set(DEBUG_FLAGS -g -O0) + endif() +else() + set(DEBUG_FLAGS "") +endif() + +set(COMMON_COMPILE_FLAGS -fsycl -fintelfpga -Wall ${WIN_FLAG} ${DEBUG_FLAGS} ${QACTYPES} ${USER_FLAGS}) +set(COMMON_LINK_FLAGS -fsycl -fintelfpga ${QACTYPES} ${USER_FLAGS}) + +# A SYCL ahead-of-time (AoT) compile processes the device code in two stages. +# 1. The "compile" stage compiles the device code to an intermediate +# representation (SPIR-V). +# 2. The "link" stage invokes the compiler's FPGA backend before linking. For +# this reason, FPGA backend flags must be passed as link flags in CMake. +set(EMULATOR_COMPILE_FLAGS -DFPGA_EMULATOR) +set(EMULATOR_LINK_FLAGS ) +set(REPORT_COMPILE_FLAGS -DFPGA_HARDWARE) +set(REPORT_LINK_FLAGS -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -fsycl-link=early) +set(SIMULATOR_COMPILE_FLAGS -Xssimulation -DFPGA_SIMULATOR) +set(SIMULATOR_LINK_FLAGS -Xssimulation -Xsghdl -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${SIMULATOR_OUTPUT_NAME}) +set(FPGA_COMPILE_FLAGS -DFPGA_HARDWARE) +set(FPGA_LINK_FLAGS -Xshardware -Xstarget=${FPGA_DEVICE} ${USER_FPGA_FLAGS} -reuse-exe=${CMAKE_BINARY_DIR}/${FPGA_OUTPUT_NAME}) + +############################################################################### +### FPGA Emulator +############################################################################### +add_executable(${EMULATOR_TARGET} ${SOURCE_FILES}) +target_compile_options(${EMULATOR_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${EMULATOR_TARGET} PRIVATE ${EMULATOR_COMPILE_FLAGS}) +target_link_libraries(${EMULATOR_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${EMULATOR_TARGET} ${EMULATOR_LINK_FLAGS}) +set_target_properties(${EMULATOR_TARGET} PROPERTIES OUTPUT_NAME ${EMULATOR_OUTPUT_NAME}) + +############################################################################### +### FPGA Simulator +############################################################################### +add_executable(${SIMULATOR_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILES}) +target_compile_options(${SIMULATOR_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${SIMULATOR_TARGET} PRIVATE ${SIMULATOR_COMPILE_FLAGS}) +target_link_libraries(${SIMULATOR_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${SIMULATOR_TARGET} ${SIMULATOR_LINK_FLAGS}) +set_target_properties(${SIMULATOR_TARGET} PROPERTIES OUTPUT_NAME ${SIMULATOR_OUTPUT_NAME}) + +############################################################################### +### Generate Report +############################################################################### +add_executable(${REPORT_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILES}) +target_compile_options(${REPORT_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${REPORT_TARGET} PRIVATE ${REPORT_COMPILE_FLAGS}) + +# The report target does not need the QACTYPES flag at link stage +set(MODIFIED_COMMON_LINK_FLAGS_REPORT ${COMMON_LINK_FLAGS}) +list(REMOVE_ITEM MODIFIED_COMMON_LINK_FLAGS_REPORT ${QACTYPES}) + +target_link_libraries(${REPORT_TARGET} ${MODIFIED_COMMON_LINK_FLAGS_REPORT}) +target_link_libraries(${REPORT_TARGET} ${REPORT_LINK_FLAGS}) +set_target_properties(${REPORT_TARGET} PROPERTIES OUTPUT_NAME ${REPORT_OUTPUT_NAME}) + +############################################################################### +### FPGA Hardware +############################################################################### +add_executable(${FPGA_TARGET} EXCLUDE_FROM_ALL ${SOURCE_FILES}) +target_compile_options(${FPGA_TARGET} PRIVATE ${COMMON_COMPILE_FLAGS}) +target_compile_options(${FPGA_TARGET} PRIVATE ${FPGA_COMPILE_FLAGS}) +target_link_libraries(${FPGA_TARGET} ${COMMON_LINK_FLAGS}) +target_link_libraries(${FPGA_TARGET} ${FPGA_LINK_FLAGS}) +set_target_properties(${FPGA_TARGET} PROPERTIES OUTPUT_NAME ${FPGA_OUTPUT_NAME}) + +############################################################################### +### This part only manipulates cmake variables to print the commands to the user +############################################################################### + +# set the correct object file extension depending on the target platform +if(WIN32) + set(OBJ_EXTENSION "obj") +else() + set(OBJ_EXTENSION "o") +endif() + +# Set the source file names in a string +set(SOURCE_FILE_NAME "${SOURCE_FILES}") + +function(getCompileCommands common_compile_flags special_compile_flags common_link_flags special_link_flags target output_name) + + set(file_names ${SOURCE_FILE_NAME}) + set(COMPILE_COMMAND ) + set(LINK_COMMAND ) + + foreach(source ${file_names}) + # Get the relative path to the source and object files + file(RELATIVE_PATH CURRENT_SOURCE_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${source}) + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Creating a string that contains the compile command + # Start by the compiler invocation + set(COMPILE_COMMAND "${COMPILE_COMMAND}${CMAKE_CXX_COMPILER}") + + # Add all the potential includes + foreach(INCLUDE ${USER_INCLUDE_PATHS}) + if(NOT IS_ABSOLUTE ${INCLUDE}) + file(RELATIVE_PATH INCLUDE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${INCLUDE}) + endif() + set(COMPILE_COMMAND "${COMPILE_COMMAND} -I${INCLUDE}") + endforeach() + + # Add all the common compile flags + foreach(FLAG ${common_compile_flags}) + set(COMPILE_COMMAND "${COMPILE_COMMAND} ${FLAG}") + endforeach() + + # Add all the specific compile flags + foreach(FLAG ${special_compile_flags}) + set(COMPILE_COMMAND "${COMPILE_COMMAND} ${FLAG}") + endforeach() + + # Get the location of the object file + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Add the source file and the output file + set(COMPILE_COMMAND "${COMPILE_COMMAND} -c ${CURRENT_SOURCE_FILE} -o ${OBJ_FILE}\n") + endforeach() + + set(COMPILE_COMMAND "${COMPILE_COMMAND}" PARENT_SCOPE) + + # Creating a string that contains the link command + # Start by the compiler invocation + set(LINK_COMMAND "${LINK_COMMAND}${CMAKE_CXX_COMPILER}") + + # Add all the common link flags + foreach(FLAG ${common_link_flags}) + set(LINK_COMMAND "${LINK_COMMAND} ${FLAG}") + endforeach() + + # Add all the specific link flags + foreach(FLAG ${special_link_flags}) + set(LINK_COMMAND "${LINK_COMMAND} ${FLAG}") + endforeach() + + # Add the output file + set(LINK_COMMAND "${LINK_COMMAND} -o ${output_name}") + + foreach(source ${file_names}) + # Get the relative path to the source and object files + file(RELATIVE_PATH OBJ_FILE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${target}.dir/${source}.${OBJ_EXTENSION}) + + # Add the source file and the output file + set(LINK_COMMAND "${LINK_COMMAND} ${OBJ_FILE}") + endforeach() + + # Add all the potential library paths + foreach(LIB_PATH ${USER_LIB_PATHS}) + if(NOT IS_ABSOLUTE ${LIB_PATH}) + file(RELATIVE_PATH LIB_PATH ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/${LIB_PATH}) + endif() + if(NOT WIN32) + set(LINK_COMMAND "${LINK_COMMAND} -L${LIB_PATH}") + else() + set(LINK_COMMAND "${LINK_COMMAND} -L${LIB_PATH} -Wl,-rpath,${LIB_PATH}") + endif() + endforeach() + + # Add all the potential includes + foreach(LIB ${USER_LIBS}) + set(LINK_COMMAND "${LINK_COMMAND} -l${LIB}") + endforeach() + + set(LINK_COMMAND "${LINK_COMMAND}" PARENT_SCOPE) + +endfunction() + +# Windows executable is going to have the .exe extension +if(WIN32) + set(EXECUTABLE_EXTENSION ".exe") +endif() + +# Display the compile instructions in the emulation flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${EMULATOR_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${EMULATOR_LINK_FLAGS}" "${EMULATOR_TARGET}" "${EMULATOR_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayEmulationCompileCommands + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${EMULATOR_TARGET} displayEmulationCompileCommands) + +# Display the compile instructions in the simulation flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${SIMULATOR_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${SIMULATOR_LINK_FLAGS}" "${SIMULATOR_TARGET}" "${SIMULATOR_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displaySimulationCompileCommands + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${SIMULATOR_TARGET} displaySimulationCompileCommands) + +# Display the compile instructions in the report flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${REPORT_COMPILE_FLAGS}" "${MODIFIED_COMMON_LINK_FLAGS_REPORT}" "${REPORT_LINK_FLAGS}" "${REPORT_TARGET}" "${REPORT_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayReportCompileCommands + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${REPORT_TARGET} displayReportCompileCommands) + +# Display the compile instructions in the fpga flow +getCompileCommands("${COMMON_COMPILE_FLAGS}" "${FPGA_COMPILE_FLAGS}" "${COMMON_LINK_FLAGS}" "${FPGA_LINK_FLAGS}" "${FPGA_TARGET}" "${FPGA_OUTPUT_NAME}${EXECUTABLE_EXTENSION}") + +add_custom_target( displayFPGACompileCommands + ${CMAKE_COMMAND} -E cmake_echo_color --cyan "" + COMMENT "To compile manually:\n${COMPILE_COMMAND}\nTo link manually:\n${LINK_COMMAND}") +add_dependencies(${FPGA_TARGET} displayFPGACompileCommands) \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/README.md b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/README.md new file mode 100644 index 0000000000..6fb4229daa --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/README.md @@ -0,0 +1,523 @@ +# 2D Image Convolution + +This project demonstrates how to use SYCL HLS to create a 2D Convolution IP that can be used in your Intel® Quartus® Prime projects. + +## Purpose + +Convolution is a cornerstone problem in video processing designs. It commonly appears in both hardware and software systems, but in general, software implementations do not efficiently synthesize into RTL. This reference design includes: + +* A reusable C++ library that generates an efficient line buffer hardware module. This hardware module is the foundation of convolution algorithms, and other similar algorithms. + +* An example of how to chain together multiple video processing modules using `sycl::pipe`s, which behave like streaming interfaces in hardware designs. + +* A video-processing testbench that you can use to test your own designs. This testbench consumes image data from RAM and parcels it into streaming transactions. This makes it easy to stream a test image into your video design. + +## Prerequisites + +This sample is part of the FPGA code samples. +It is categorized as a Tier 4 sample that demonstrates a reference design. + +```mermaid +flowchart LR + tier1("Tier 1: Get Started") + tier2("Tier 2: Explore the Fundamentals") + tier3("Tier 3: Explore the Advanced Techniques") + tier4("Tier 4: Explore the Reference Designs") + + tier1 --> tier2 --> tier3 --> tier4 + + style tier1 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff + style tier2 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff + style tier3 fill:#0071c1,stroke:#0071c1,stroke-width:1px,color:#fff + style tier4 fill:#f96,stroke:#333,stroke-width:1px,color:#fff +``` + +Find more information about how to navigate this part of the code samples in the [FPGA top-level README.md](/DirectProgramming/C++SYCL_FPGA/README.md). +You can also find more information about [troubleshooting build errors](/DirectProgramming/C++SYCL_FPGA/README.md#troubleshooting), [running the sample on the Intel® DevCloud](/DirectProgramming/C++SYCL_FPGA/README.md#build-and-run-the-samples-on-intel-devcloud-optional), [using Visual Studio Code with the code samples](/DirectProgramming/C++SYCL_FPGA/README.md#use-visual-studio-code-vs-code-optional), [links to selected documentation](/DirectProgramming/C++SYCL_FPGA/README.md#documentation), etc. + +| Optimized for | Description +|:--- |:--- +| OS | Ubuntu* 20.04
RHEL*/CentOS* 8
SUSE* 15
Windows* 10
Windows Server* 2019 +| Hardware | Intel® Agilex® 7, Arria® 10, and Stratix® 10 FPGAs +| Software | Intel® oneAPI DPC++/C++ Compiler + +> **Note**: Even though the Intel DPC++/C++ OneAPI compiler is enough to compile for emulation, generating reports and generating RTL, there are extra software requirements for the simulation flow and FPGA compiles. +> +> For using the simulator flow, Intel® Quartus® Prime Pro Edition and one of the following simulators must be installed and accessible through your PATH: +> - Questa*-Intel® FPGA Edition +> - Questa*-Intel® FPGA Starter Edition +> - ModelSim® SE +> +> When using the hardware compile flow, Intel® Quartus® Prime Pro Edition must be installed and accessible through your PATH. +> +> :warning: Make sure you add the device files associated with the FPGA that you are targeting to your Intel® Quartus® Prime installation. + +### Performance + +Performance results are based on testing conducted with a pre-release version of oneAPI 2024.1, with released Intel® Quartus® Prime Pro Edition 23.3 software. Area and fMAX estimates are averaged across 8 seeds. Testing conducted January 22, 2024. These area estimates are ONLY for the `Convolution2d` kernel, and do not include the `RGB2Grey` or `Grey2RGB` kernels. You can compile the design with only the `Convolution2d` kernel by compiling with the `-DTEST_CONV2D_ISOLATED=1` compiler flag, or by adding `#define TEST_CONV2D_ISOLATED 1` in `src/main.cpp`. + +> **Note**: Refer to the [Performance Disclaimers](/DirectProgramming/C++SYCL_FPGA/README.md#performance-disclaimers) section for important performance information. + +#### Intel Agilex® 7 FPGA + +| Parallel Pixels | Window Dimensions | Coefficient Type | Input Type | fMAX (MHz) | ALMs | DSP blocks | M20K Block RAM +|--- |--- |--- |--- |--- |--- |--- |--- +| 1 | 3x3 | `float` | 10-bit Integer | 550.97 | 2705 | 9 | 18 +| 2 | 3x3 | `float` | 10-bit Integer | 548.65 | 4324 | 18 | 18 +| 4 | 3x3 | `float` | 10-bit Integer | 547.58 | 6938 | 36 | 17 +| 8 | 3x3 | `float` | 10-bit Integer | 525.12 | 14226 | 72 | 18 + +> **Note**: This design uses a relatively large number of ALM resources because of the floating-point conversions in `ConvolutionFunction()` in `src/convolution_kernel.hpp`. The coefficients for this design were specified as floating-point for maximal flexibility in coefficient values, but the enthusiastic user is encouraged to convert this function to fixed-point using the `ac_fixed` types, as described in [this sample](/DirectProgramming/C%2B%2BSYCL_FPGA/Tutorials/Features/ac_fixed). + +## Key Implementation Details + +### Line Buffer Hardware + +The `LineBuffer2d` class describes a line buffer data structure that compiles into efficient FPGA hardware. The user code inserts pixels into the line buffer data structure, which buffers the pixels so as to effectively slide a 2-D 'window' across the input data stream. A user-specified function operates on the 'window'. + +![](assets/animation_fir_filter.gif) + +The `LineBuffer2d` class lets you specify the number of concurrent pixels your design processes each transaction. This number affects the width and depth of the line buffer, and controls number of copies of your window function. Increasing the number of parallel pixels improves throughput, but also consumes more FPGA resources. The line buffer structure compiles into hardware like in the following figure: + +![](assets/FIR2d_pip2.svg) + +You can instantiate the `LineBuffer2d` in a oneAPI kernel like this: + +```c++ +#include "linebuffer2d.hpp" +<...> +template +struct Convolution2d { + // these defaults are not propagated to the RTL + int rows = 0; + int cols = 0; + + // These coefficients used in convolution. kernel are often called a 'kernel' + // in the image-processing field, but that term is avoided to reduce + // confusion. Since kernel are a kernel argument, kernel can be specified at + // runtime. The coefficients can only be updated by stopping the kernel and + // re-starting it. + conv2d::WeightType coeffs[conv2d::kWindowSize * conv2d::kWindowSize]; + + void operator()() const { + // This instance of the line buffer will store previously read pixels so + // that they can be operated on in a local filter. The filter is invoked + // below in the loop. + linebuffer2d::LineBuffer2d + myLineBuffer(rows, cols); + + bool keepGoing = true; + bool bypass = true; + + [[intel::initiation_interval(1)]] // + while (keepGoing) { + // do non-blocking reads so that the kernel can be interrupted at any + // time. + bool didReadBeat = false; + conv2d::GreyScaleBeat newBeat = PipeIn::read(didReadBeat); + + // the bypass signal lets the user disable the line buffer processing. + bool didReadBypass = false; + bool shouldBypass = BypassCSR::read(didReadBypass); + + // the stop signal lets the user instruct the kernel to halt so that new + // coefficients can be read. + bool didReadStop = false; + bool shouldStop = StopCSR::read(didReadStop); + + if (didReadBypass) { + bypass = shouldBypass; + } + + if (didReadBeat) { + conv2d::GreyScaleBeat outputBeat; + if (bypass) { + outputBeat = newBeat; + } else { + bool sop, eop; + + // Call `filter()` function on `LineBuffer2d` object. This inserts a + // new pixel into the linebuffer, and runs the user-provided window + // function (`ConvolutionFunction()`). The additional argument + // `coeffs` is passed to `ConvolutionFunction()`. The return value of + // `filter()` is the pixel data that we should propagate on to the + // next link in the processing chain. + conv2d::GreyPixelBundle outputBundle = + myLineBuffer.filter( + newBeat.data, newBeat.sop, newBeat.eop, sop, eop, coeffs); + outputBeat = conv2d::GreyScaleBeat(outputBundle, sop, eop, 0); + } + PipeOut::write(outputBeat); + } + + if (didReadStop) { + keepGoing = !shouldStop; + } + } + } +}; +``` + +The window function that you supply to `LineBuffer2d::filter<>()` should be of the following form: + +```c++ +OutputPixelType WindowFunction(short row, short col, short rows, + short cols, InputPixelType *buffer, + ... otherArgs) +``` + +`row`, `col`, `rows`, `cols`, and `buffer` are mandatory, but you can add additional arguments. For example, a runtime-defined set of coefficients. + +You can use this line buffer to implement a convolution operation by specifying a window function like this: + +```c++ +conv2d::PixelType ConvolutionFunction( + short row, short col, short rows, short cols, conv2d::PixelType *buffer, + const std::array + coefficients) { + float sum = 0.0f; +#pragma unroll + for (int w_row = 0; w_row < conv2d::kWindowSize; w_row++) { +#pragma unroll + for (int w_col = 0; w_col < conv2d::kWindowSize; w_col++) { + short c_select, r_select; + + // handle the case where the center of the window is at the image edge. + // In this design, simply 'reflect' pixels that are already in the window. + SaturateWindowCoordinates(w_row, w_col, + row, col, + rows, cols, + r_select, c_select); + conv2d::PixelType pixel = + buffer[c_select + r_select * conv2d::kWindowSize]; + + constexpr float kNormalizationFactor = (1 << conv2d::kBitsPerChannel); + + // converting `pixel` to a floating-point value uses lots of FPGA + // resources. If your expected coefficients have a narrow range, it will + // be worthwhile to convert these operations to fixed-point. + float normalized_pixel = (float)pixel / kNormalizationFactor; + + float normalized_coeff = + coefficients[w_col + w_row * conv2d::kWindowSize]; + + sum += normalized_pixel * normalized_coeff; + } + } + + // map range (-1.0, 1.0) to [0, 1<( + q, rows_small, cols_small, grey_pixels_in); + + // extra pixels to flush out the FIFO + int dummy_pixels = cols_small * conv2d::kWindowSize; + vvp_stream_adapters::WriteDummyPixelsToPipe( + q, dummy_pixels, (uint16_t)69); + + // disable bypass, since it's on by default + BypassCSR::write(q, false); + + sycl::event e = q.single_task( + Convolution2d{ + (int)rows_small, (int)cols_small, identity_coeffs}); + + conv2d::PixelType grey_pixels_out[pixels_count]; + bool sidebands_ok; + int defective_frames; + vvp_stream_adapters::ReadFrameFromPipe( + q, rows_small, cols_small, grey_pixels_out, sidebands_ok, + defective_frames); + + bool pixels_match = true; + for (int i = 0; i < pixels_count; i++) { + conv2d::PixelType grey_pixel_expected = <...> + pixels_match &= (grey_pixel_expected == grey_pixels_out[i]); + } + + // Stop the kernel in case testbench wants to run again with different kernel + // arguments. + StopCSR::write(q, true); + e.wait(); + + return sidebands_ok & pixels_match; +} +``` + +## Building the `convolution2d` Tutorial + +> **Note**: When working with the command-line interface (CLI), you should configure the oneAPI toolkits using environment variables. +> Set up your CLI environment by sourcing the `setvars` script located in the root of your oneAPI installation every time you open a new terminal window. +> This practice ensures that your compiler, libraries, and tools are ready for development. +> +> Linux*: +> - For system wide installations: `. /opt/intel/oneapi/setvars.sh` +> - For private installations: ` . ~/intel/oneapi/setvars.sh` +> - For non-POSIX shells, like csh, use the following command: `bash -c 'source /setvars.sh ; exec csh'` +> +> Windows*: +> - `C:\Program Files(x86)\Intel\oneAPI\setvars.bat` +> - Windows PowerShell*, use the following command: `cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'` +> +> For more information on configuring environment variables, see [Use the setvars Script with Linux* or macOS*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html) or [Use the setvars Script with Windows*](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-windows.html). + +Use these commands to run the design, depending on your OS. + +### On a Linux* System +This design uses CMake to generate a build script for GNU/make. + +1. Change to the sample directory. + +2. Configure the build system for the Arria 10 device family, which is the default. + + ``` + mkdir build + cd build + cmake .. + ``` + + > **Note**: You can change the default target by using the command: + > ``` + > cmake .. -DFPGA_DEVICE= + > ``` + +3. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + | Compilation Type | Command + |:--- |:--- + | FPGA Emulator | `make fpga_emu` + | Optimization Report | `make report` + | FPGA Simulator | `make fpga_sim` + | FPGA Hardware | `nmake fpga` + +### On a Windows* System +This design uses CMake to generate a build script for `nmake`. + +1. Change to the sample directory. + +2. Configure the build system for the Agilex® 7 device family, which is the default. + ``` + mkdir build + cd build + cmake -G "NMake Makefiles" .. + ``` + + > **Note**: You can change the default target by using the command: + > ``` + > cmake -G "NMake Makefiles" .. -DFPGA_DEVICE= + > ``` + +3. Compile the design through the generated `Makefile`. The following build targets are provided, matching the recommended development flow: + + | Compilation Type | Command (Windows) + |:--- |:--- + | FPGA Emulator | `nmake fpga_emu` + | Optimization Report | `nmake report` + | FPGA Simulator | `nmake fpga_sim` + | FPGA Hardware | `nmake fpga` + + > **Note**: If you encounter any issues with long paths when compiling under Windows*, you may have to create your ‘build’ directory in a shorter path, for example c:\samples\build. You can then run cmake from that directory, and provide cmake with the full path to your sample directory. + +## Run the `convolution2d` Executable + +### On Linux +1. Run the sample on the FPGA emulator (the kernel executes on the CPU). + ``` + ./convolution.fpga_emu + ``` +2. Run the sample on the FPGA simulator device. + ``` + CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 ./convolution.fpga_sim + ``` +### On Windows +1. Run the sample on the FPGA emulator (the kernel executes on the CPU). + ``` + convolution.fpga_emu.exe + ``` +2. Run the sample on the FPGA simulator device. + ``` + set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 + convolution.fpga_sim.exe + set CL_CONTEXT_MPSIM_DEVICE_INTELFPGA= + ``` + +## Example Output + +``` +Running on device: Intel(R) FPGA Emulation Device + +********************************** +Check a sequence of good frames... +********************************** + +INFO: Load image ../test_0.bmp +INFO: convert to vvp type. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Load image ../test_1.bmp +INFO: convert to vvp type. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Load image ../test_2.bmp +INFO: convert to vvp type. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Load image ../test_3.bmp +INFO: convert to vvp type. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Load image ../test_4.bmp +INFO: convert to vvp type. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Storing dummy pixels to pipe with 2 pixels in parallel. +Info: Wrote 96 dummy streaming beats. +Launch kernels! + +********************* +Reading out frame 0 +Expect some unexpected start-of-packets as the kernel flushes its initial state. +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=TRUE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. + + + +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=FALSE. +INFO: convert to bmp type. +Wrote convolved image ./output_0.bmp +Compare with ../expected_sobel_0.bmp. +frame 0 passed + +********************* +Reading out frame 1 +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: convert to bmp type. +Wrote convolved image ./output_1.bmp +Compare with ../expected_sobel_1.bmp. +frame 1 passed + +********************* +Reading out frame 2 +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: convert to bmp type. +Wrote convolved image ./output_2.bmp +Compare with ../expected_sobel_2.bmp. +frame 2 passed + +********************* +Reading out frame 3 +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: convert to bmp type. +Wrote convolved image ./output_3.bmp +Compare with ../expected_sobel_3.bmp. +frame 3 passed + +********************* +Reading out frame 4 +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: convert to bmp type. +Wrote convolved image ./output_4.bmp +Compare with ../expected_sobel_4.bmp. +frame 4 passed + +Kernel version = 1 (Expected 1) + +Finished checking a sequence of good frames. + + + +****************************************************** +Check a defective frame followed by a good frame... +****************************************************** + +Reading input image ../test_0.bmp +INFO: convert to vvp type. +INFO: WriteFrameToPipe: will end frame early, after 2048 pixels. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Storing data to pipe with 2 pixels in parallel. +INFO: Storing dummy pixels to pipe with 2 pixels in parallel. +Info: Wrote 96 dummy streaming beats. + +**************************** +Read out good frame (defective frame overwritten) +Expect some unexpected start-of-packets as the kernel flushes its initial state, and the defective frame. +INFO: Reading data from pipe with 2 pixels in parallel. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=TRUE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. + + + +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=TRUE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=FALSE. +INFO: Saw unexpected start of packet; reset counters. +INFO: ReadFrameFromPipe(): [i = 0] - expect sop=FALSE eop=FALSE. saw sop=TRUE eop=FALSE. +INFO: convert to bmp type. +Wrote convolved image ./output_defect.bmp +frame 'defect' passed + +Overall result: PASSED +``` +## License +Code samples are licensed under the MIT license. See +[License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details. + +Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt). + diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip1.svg b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip1.svg new file mode 100644 index 0000000000..afe29307cc --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip1.svg @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1035 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1036 + + Sheet.1037 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1038 + Window (Registers) + + Sheet.1039 + + + + + + + + + + + + + + + + + + + + + + + + Window (Registers) + + + + Rectangle + Row Buffer + + + + + + + Row Buffer + + Rectangle.3 + Row Buffer + + + + + + + Row Buffer + + Square.1033 + Pixel + + + + + + + Pixel + + Trapezoid.1010 + Window Function + + + + + + + Window Function + + Dynamic connector.1020 + + + + Dynamic connector.1022 + + + + Dynamic connector.1023 + + + + Dynamic connector.1024 + + + + Dynamic connector.1025 + + + + Dynamic connector.1026 + + + + Dynamic connector.1027 + + + + Dynamic connector.1028 + + + + Dynamic connector.1029 + + + + Dynamic connector.1030 + + + + Dynamic connector.1031 + + + + Square.6 + + + + + + + Square.1000 + + + + + + + Square.1001 + + + + + + + Square.1002 + + + + + + + Square.1003 + + + + + + + Square.1004 + + + + + + + Square.1005 + + + + + + + Square.1006 + + + + + + + Square.1007 + + + + + + + Dynamic connector.1040 + + + + diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip2.svg b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip2.svg new file mode 100644 index 0000000000..e31a8f6fd2 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/FIR2d_pip2.svg @@ -0,0 +1,820 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1035 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1036 + + Sheet.1037 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1038 + Window (Registers) + + Sheet.1039 + + + + + + + + + + + + + + + + + + + + + + + + Window (Registers) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1052 + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1053 + + Sheet.1054 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1055 + Window 2 + + Sheet.1056 + + + + + + + + + + + + + + + + + + + + + + + + Window 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1047 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1048 + + Sheet.1049 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1050 + Window 1 + + Sheet.1051 + + + + + + + + + + + + + + + + + + + + + + + + Window 1 + + + + Rectangle + Row Buffer + + + + + + + Row Buffer + + Rectangle.3 + Row Buffer + + + + + + + Row Buffer + + Trapezoid.1010 + Window Function + + + + + + + Window Function + + Dynamic connector.1022 + + + + Dynamic connector.1023 + + + + Dynamic connector.1024 + + + + Dynamic connector.1026 + + + + Dynamic connector.1028 + + + + Dynamic connector.1030 + + + + Square.6 + + + + + + + Square.1000 + + + + + + + Square.1001 + + + + + + + Square.1002 + + + + + + + Square.1003 + + + + + + + Square.1004 + + + + + + + Square.1005 + + + + + + + Square.1006 + + + + + + + Square.1007 + + + + + + + Dynamic connector.1040 + + + + Square.1041 + + + + + + + Square.1042 + + + + + + + Square.1043 + + + + + + + Dynamic connector.1044 + + + + Dynamic connector.1045 + + + + Dynamic connector.1046 + + + + Trapezoid.1057 + Window Function + + + + + + + Window Function + + Dynamic connector.1058 + + + + Sheet.1059 + + Square.1033 + Pixel1 + + + + + + + Pixel1 + + Square.1045 + Pixel0 + + + + + + + Pixel0 + + + Dynamic connector.1064 + + + + Dynamic connector.1065 + + + + Rectangle.1066 + Offset Buffer + + + + + + + Offset Buffer + + Dynamic connector.1067 + + + + Dynamic connector.1068 + + + + Sheet.1069 + + Square.1033 + Pixel1 + + + + + + + Pixel1 + + Square.1045 + Pixel0 + + + + + + + Pixel0 + + + Dynamic connector.1072 + + + + Dynamic connector.1073 + + + + diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/animation_fir_filter.gif b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/animation_fir_filter.gif new file mode 100644 index 0000000000..c93df1e27c Binary files /dev/null and b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/animation_fir_filter.gif differ diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/system.svg b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/system.svg new file mode 100644 index 0000000000..8ed205be71 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/system.svg @@ -0,0 +1,585 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + + + Rectangle + Convolution2d + + + + + + + Convolution2d + + Avalon streaming sink.1036 + + Sheet.1020 + Streaming Sink + + + + Streaming Sink + + Dynamic connector.1014 + Ready + + + + + Ready + + Dynamic connector.1015 + Valid + + + + + Valid + + Dynamic connector.1016 + Data + + + + + Data + + Dynamic connector.1017 + <other signals> + + + + + <other signals> + + + Avalon streaming Source.1042 + + Sheet.1026 + Streaming Source + + + + Streaming Source + + + CSR + + Sheet.1032 + + Corner table.1033 + CSR + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CSR + + Data table.1032 + START STATUS ... ARG_0 ... + + + + + + + + + + + + + STARTSTATUS...ARG_0... + + + Sheet.1035 + MM Agent + + + + MM Agent + + + Rectangle.1037 + RGB2Grey + + + + + + + RGB2Grey + + Sheet.1038 + Start + + + + Start + + Sheet.1039 + Done + + + + Done + + Sheet.1040 + Ready_in + + + + Ready_in + + Sheet.1041 + Ready_out + + + + Ready_out + + Sheet.1042 + high + + + + high + + Dynamic connector.1021 + + + + Avalon streaming sink.1046 + + Sheet.1047 + Streaming Sink + + + + Streaming Sink + + Dynamic connector.1014 + Ready + + + + + Ready + + Dynamic connector.1015 + Valid + + + + + Valid + + Dynamic connector.1016 + Data + + + + + Data + + Dynamic connector.1017 + <other signals> + + + + + <other signals> + + + Avalon streaming Source.1052 + + Sheet.1053 + Streaming Source + + + + Streaming Source + + + Rectangle.1055 + Grey2RGB + + + + + + + Grey2RGB + + Sheet.1056 + Start + + + + Start + + Sheet.1057 + Done + + + + Done + + Sheet.1058 + Ready_in + + + + Ready_in + + Sheet.1059 + Ready_out + + + + Ready_out + + Sheet.1060 + high + + + + high + + Dynamic connector.1061 + + + + Avalon streaming sink.1063 + + Sheet.1064 + Streaming Sink + + + + Streaming Sink + + Dynamic connector.1014 + Ready + + + + + Ready + + Dynamic connector.1015 + Valid + + + + + Valid + + Dynamic connector.1016 + Data + + + + + Data + + Dynamic connector.1017 + <other signals> + + + + + <other signals> + + + Avalon streaming Source.1069 + + Sheet.1070 + Streaming Source + + + + Streaming Source + + Dynamic connector.1018 + Ready + + + + + Ready + + Dynamic connector.1019 + Valid + + + + + Valid + + Dynamic connector.1020 + Data + + + + + Data + + Dynamic connector.1021 + <other signals> + + + + + <other signals> + + + Rectangle.42 + Gasket + + + + + + + Gasket + + AXI4S-Transmitter.50 + + Sheet.1077 + Transmitter + + + + Transmitter + + Dynamic connector.1018 + TREADY + + + + + TREADY + + Dynamic connector.1019 + TVALID + + + + + TVALID + + Dynamic connector.1020 + TDATA + + + + + TDATA + + Dynamic connector.1021 + TUSER + + + + + TUSER + + Dynamic connector.12 + TLAST + + + + + TLAST + + + Avalon streaming sink.1037 + Streaming Sink + + + + Streaming Sink + + Rectangle.1023 + Gasket + + + + + + + Gasket + + Sheet.1085 + Streaming Source + + + + Streaming Source + + AXI4S-Receiver.1026 + + Dynamic connector.1018 + TREADY + + + + + TREADY + + Dynamic connector.1019 + TVALID + + + + + TVALID + + Dynamic connector.1020 + TDATA + + + + + TDATA + + Dynamic connector.1021 + TUSER + + + + + TUSER + + Dynamic connector.12 + TLAST + + + + + TLAST + + Sheet.1092 + Receiver + + + + Receiver + + + diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/testbench_architecture.svg b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/testbench_architecture.svg new file mode 100644 index 0000000000..c74521e065 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/assets/testbench_architecture.svg @@ -0,0 +1,1024 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Page-1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1032 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1033 + + Sheet.1034 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1035 + Pixel Beat (sycl::ext::intel::experimental::StreamingBeat) + + Sheet.1036 + + + + + + + + + + + + + + + + + + + + + + + + Pixel Beat (sycl::ext::intel::experimental::StreamingBeat) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1027 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1028 + + Sheet.1029 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1030 + Pixel Bundle (fpga_tools::DataBundle<PixelType, kParallelPixels>) + + Sheet.1031 + + + + + + + + + + + + + + + + + + + + + + + + Pixel Bundle (fpga_tools::DataBundle<PixelType, kParallelPixels>) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1076 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1077 + + Sheet.1078 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1079 + Pixel Beat (sycl::ext::intel::experimental::StreamingBeat) + + Sheet.1080 + + + + + + + + + + + + + + + + + + + + + + + + Pixel Beat (sycl::ext::intel::experimental::StreamingBeat) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plain.1081 + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1082 + + Sheet.1083 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sheet.1084 + Pixel Bundle (fpga_tools::DataBundle<PixelType, kParallelPixels>) + + Sheet.1085 + + + + + + + + + + + + + + + + + + + + + + + + Pixel Bundle (fpga_tools::DataBundle<PixelType, kParallelPixels>) + + + + Rectangle.1001 + main() + + + + + + + main() + + Rectangle.1004 + Place input image in an array + + + + + + + Place input image in an array + + Rectangle.1005 + Consume output image + + + + + + + Consume output image + + Dynamic connector.1006 + + + + Dynamic connector.1007 + + + + Dynamic connector.1008 + + + + Dynamic connector.1009 + + + + Rectangle.1012 + Pixel_rgb + + + + + + + Pixel_rgb + + Rectangle.1018 + Pixel_rgb + + + + + + + Pixel_rgb + + Sheet.1019 + ... + + + + ... + + Square.1025 + SoP + + + + + + + SoP + + Square.1026 + EoP + + + + + + + EoP + + Dynamic connector.1018 + Ready + + + + + Ready + + Dynamic connector.1019 + Valid + + + + + Valid + + Dynamic connector.1020 + Data + + + + + Data + + Dynamic connector.1021 + SOP/EOP + + + + + SOP/EOP + + Sheet.1065 + + Rectangle.1011 + Kernel + + + + + + + Kernel + + Sheet.1060 + Streaming Source + + + + Streaming Source + + Avalon streaming sink.1039 + + Sheet.1040 + Streaming Sink + + + + Streaming Sink + + + + Sheet.1066 + + Rectangle.1003 + vvp_stream_adapters:: ReadFrameFromPipe + + + + + + + vvp_stream_adapters:: ReadFrameFromPipe + + Sheet.1058 + Streaming Sink + + + + Streaming Sink + + + Dynamic connector.1069 + Ready + + + + + Ready + + Dynamic connector.1070 + Valid + + + + + Valid + + Dynamic connector.1071 + Data + + + + + Data + + Dynamic connector.1072 + SOP/EOP + + + + + SOP/EOP + + Sheet.1073 + + Rectangle.1002 + vvp_stream_adapters:: WriteFrameToPipe + + + + + + + vvp_stream_adapters:: WriteFrameToPipe + + Sheet.1068 + Streaming Source + + + + Streaming Source + + + Dynamic connector.1074 + + + + Dynamic connector.1075 + + + + Rectangle.1086 + Pixel_rgb + + + + + + + Pixel_rgb + + Rectangle.1087 + Pixel_rgb + + + + + + + Pixel_rgb + + Sheet.1088 + ... + + + + ... + + Square.1089 + SoP + + + + + + + SoP + + Square.1090 + EoP + + + + + + + EoP + + Dynamic connector.1091 + + + + Dynamic connector.1092 + + + + diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/bmp_tools.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/bmp_tools.hpp new file mode 100644 index 0000000000..ea7ffda1f8 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/bmp_tools.hpp @@ -0,0 +1,457 @@ +// Copyright (c) 2024 Intel Corporation +// SPDX-License-Identifier: MIT + +// bmp_tools.h + +// This header file parses bitmap image files. + +#ifndef BMP_TOOLS_H +#define BMP_TOOLS_H + +#include +#include +#include + +#include +#include + +#ifndef FILENAME_BUF_SIZE +#if defined(_WIN32) || defined(_WIN64) +#define FILENAME_BUF_SIZE _MAX_PATH +#else +#define FILENAME_BUF_SIZE MAX_PATH +#endif +#endif + +// HLS did not allow exception handling, but oneAPI does. + +// #ifndef ENABLE_EXCEPTION_HANDLING +// #define ENABLE_EXCEPTION_HANDLING 0 +// #endif + +namespace bmp_tools { + +enum BmpHeaderField : uint16_t { + BM = 0x4D42, // "BM" + BA = 0x4142, // "BA" + CI = 0x4943, // "CI" + CP = 0x5043, // "CP" + IC = 0x4349, // "IC" + PT = 0x5450 // "PT" +}; + +// pack this struct at the byte level so we can load file data directly into it +#pragma pack(push, 1) +struct BmpFileHeader { + BmpHeaderField header_field; + uint32_t file_size; + uint16_t reserved_0; + uint16_t reserved_1; + uint32_t img_data_offset; +}; +#pragma pack(pop) + +// pack this struct at the byte level so we can load file data directly into it +#pragma pack(push, 1) +struct WindowsBitmapInfoHeader { + uint32_t header_size; // should be 40 bytes + int32_t img_width; + int32_t img_height; + uint16_t img_planes; // must be 1 + uint16_t img_bitdepth; + uint32_t img_compression; + uint32_t img_size; + uint32_t img_resolution_horiz; + uint32_t img_resolution_vert; + uint32_t img_colors; + uint32_t img_important_colors; +}; +#pragma pack(pop) + +// Bitmap header size in bytes +static constexpr int BMP_HEADER_SIZE = + sizeof(WindowsBitmapInfoHeader) + sizeof(BmpFileHeader); + +/// @brief Read a `.bmp` file pointed to by `filename` into memory. The image +/// pixels will be stored as 32-bit unsigned integers. This function will +/// allocate memory and store a pointer to that memory in `img_data`. +/// +/// @paragraph For simplicity, we only support a certain type of BMP file, +/// namely 24-bit Windows-style, with all important colors and a single color +/// plane. +/// +/// @param[in] file_path File path to read from +/// +/// @param[out] img_data A pointer to an array of pixels that has been allocated +/// by `ReadBmp`. The size of the allocated buffer will be equal to `height` * +/// `width` * `sizeof(unsigned int)`. Declare `unsigned int *myData`, then pass +/// `&myData` as an argument to this function. +/// +/// @param[out] height Number of rows in the image that was read +/// +/// @param[out] width Number of columns in the image that was read +/// +/// @return `true` if succeeded, `false` if failed. +inline bool ReadBmp(std::string &file_path, unsigned int **img_data, + int &height, int &width) { + if (nullptr == img_data) { + std::cerr << "ERROR: img_data must not be null." << std::endl; + return false; + } + std::ifstream input_bmp; +#if ENABLE_EXCEPTION_HANDLING + try { +#endif + input_bmp.open(file_path, std::ios::in | std::ios::binary); + if (!input_bmp) { + std::cerr << "ERROR: input file " << file_path << " does not exist." + << std::endl; + return false; + } + +#if ENABLE_EXCEPTION_HANDLING + } catch (std::ios_base::failure &e) { + std::cerr << e.what() << '\n'; + std::cerr << "ERROR: can't open file " << file_path << " for binary reading" + << std::endl; + return false; + } +#endif + + bool failed = false; + + // load file header + BmpFileHeader file_header; + input_bmp.read(reinterpret_cast(&file_header), sizeof(BmpFileHeader)); + + if (file_header.header_field != BmpHeaderField::BM) { + std::cerr << "ERROR: only Windows-format bitmap header is supported. " + "Please convert to a Windows-style bitmap." + << std::endl; + return false; + } + + WindowsBitmapInfoHeader dib_header; + input_bmp.read(reinterpret_cast(&dib_header), + sizeof(WindowsBitmapInfoHeader)); + + width = dib_header.img_width; + height = dib_header.img_height; + + // sanity check that inputs are valid + if (dib_header.img_bitdepth != 24) { + std::cerr << "ERROR: Only 24-bit BMP is supported. Please ensure your BMP " + "uses 24-bit pixels (24)" + << std::endl; + return false; + } + + if (dib_header.img_planes != 1) { + std::cerr << "ERROR: Only 1-plane BMP is supported. Please ensure your BMP " + "uses a single color plane (1)." + << std::endl; + return false; + } + + if (dib_header.img_colors != 0) { + std::cerr << "ERROR: requires 2^n colors. Please ensure your BMP uses the " + "default number of colors (0)." + << std::endl; + return false; + } + + if (dib_header.img_important_colors != 0) { + std::cerr + << "ERROR: all colors should be important. Please ensure your BMP uses " + "the default number of important colors (0)." + << std::endl; + return false; + } + + // check that width*height is also valid + if ((height < 0 || width < 0) || (height * width < 0)) { + std::cerr << "ERROR: got height " << height << ", width " << width + << std::endl; + return false; + } + *img_data = (unsigned int *)malloc(width * height * sizeof(unsigned int)); + + if (!*img_data) { + std::cerr << "ERROR: Failed to allocate memory for img_data." << std::endl; + return false; + } + + // scroll to image data + auto read_bytes = sizeof(WindowsBitmapInfoHeader) + sizeof(BmpFileHeader); + while (read_bytes < file_header.img_data_offset) { + input_bmp.get(); + read_bytes++; + } + + // BMP: Each line must be a multiple of 4 bytes + int padding = (4 - ((width * 3) & 3)) & 3; + int idx = 0; + + // Color order is BGR, read across bottom row, then repeat going up rows + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + unsigned char b = input_bmp.get(); // B + unsigned char g = input_bmp.get(); // G + + std::ios_base::iostate state = input_bmp.rdstate(); + + bool earlyEOF = state & std::ifstream::eofbit; + unsigned char r = input_bmp.get(); // R + (*img_data)[idx] = (((unsigned int)r << 16) | ((unsigned int)g << 8) | + ((unsigned int)b << 0)); + idx++; + if (earlyEOF) failed |= earlyEOF; + + state = input_bmp.rdstate(); + bool failBit = state & std::ifstream::failbit; + + if (failBit) failed |= failBit; + } + // Discard the padding bytes + for (int j = 0; j < padding; j++) { + input_bmp.get(); + bool earlyEOF = input_bmp.rdstate() & std::ifstream::eofbit; + if (earlyEOF) failed |= earlyEOF; + + bool failBit = input_bmp.rdstate() & std::ifstream::failbit; + if (failBit) failed |= failBit; + } + } + input_bmp.close(); + + if (failed) { + std::cerr << "ERROR: File I/O error" << std::endl; + free(*img_data); + return false; + } + + return true; +} + +/// @brief Store pixels in `img_data` array to a bitmap pointed to by `fileame` +/// @paragraph For simplicity, we only support a certain type of BMP file, +/// namely 24-bit Windows-style, with all important colors and a single color +/// plane. +/// @param[in] file_path Filepath to write to +/// @param[in] img_data An array of pixels to write to a bmp file. The size of +/// the array must be equal to `height` * `width`. +/// @param[in] height Number of rows in the image +/// @param[in] width Number of columns in the image +/// @return `true` if the image was successfully written to the filesystem, +/// `false` if not. +inline bool WriteBmp(std::string &file_path, unsigned int *img_data, int height, + int width) { + // sanity check that inputs are valid, check that width*height is also valid + if ((height < 0 || width < 0) || (height * width > (1 << 30))) { + std::cerr << "ERROR: height " << height << ", width " << width << std::endl; + return false; + } + + unsigned int file_size = width * height * 3 + BMP_HEADER_SIZE; + unsigned char header[BMP_HEADER_SIZE] = { + 0x42, 0x4d, // BMP & DIB + + // size of file in bytes + (static_cast(file_size & 0xff)), + (static_cast((file_size >> 8) & 0xff)), + (static_cast((file_size >> 16) & 0xff)), + (static_cast((file_size >> 24) & 0xff)), + + 0x00, 0x00, 0x00, 0x00, // reserved + 0x36, 0x00, 0x00, 0x00, // offset of start of image data + 0x28, 0x00, 0x00, 0x00, // size of the DIB header + + // width in pixels + (static_cast(width & 0xff)), + (static_cast((width >> 8) & 0xff)), + (static_cast((width >> 16) & 0xff)), + (static_cast((width >> 24) & 0xff)), + + // height in pixels + (static_cast(height & 0xff)), + (static_cast((height >> 8) & 0xff)), + (static_cast((height >> 16) & 0xff)), + (static_cast((height >> 24) & 0xff)), + + 0x01, 0x00, // number of color planes + 0x18, 0x00, // number of bits per pixel + 0x00, 0x00, 0x00, 0x00, // no compression - BI_RGB + 0x00, 0x00, 0x00, 0x00, // image size - dummy 0 for BI_RGB + 0x13, 0x0b, 0x00, 0x00, // horizontal ppm + 0x13, 0x0b, 0x00, 0x00, // vertical ppm + 0x00, 0x00, 0x00, 0x00, // default 2^n colors in palatte + 0x00, 0x00, 0x00, 0x00 // every color is important + }; + // Open file for write + std::ofstream output_bmp; +#if ENABLE_EXCEPTION_HANDLING + try { +#endif + output_bmp.open(file_path); + if (!output_bmp) { + std::cerr << "ERROR: output file " << file_path << " does not exist." + << std::endl; + return false; + } +#if ENABLE_EXCEPTION_HANDLING + } catch (std::ios_base::failure &e) { + std::cerr << e.what() << '\n'; + std::cerr << "ERROR: can't open file " << file_path << " for binary writing" + << std::endl; + return false; + } +#endif + + // Write header + output_bmp.write(reinterpret_cast(header), BMP_HEADER_SIZE); + bool write_err = (output_bmp.rdstate() != std::ofstream::goodbit); + if (write_err) { + std::cerr << "ERROR: could not write header to " << file_path << std::endl; + return false; + } + + // Write data: Line size must be a multiple of 4 bytes + int padding = (4 - ((width * 3) & 3)) & 3; + unsigned int idx = 0; + for (int i = 0; i < height; ++i) { + unsigned char p[3]; + for (int j = 0; j < width; ++j) { + // written in B, G, R order + p[0] = (img_data[idx] >> 0) & 0xff; // B + p[1] = (img_data[idx] >> 8) & 0xff; // G + p[2] = (img_data[idx] >> 16) & 0xff; // R + idx++; + + output_bmp.write(reinterpret_cast(p), 3); + bool write_err = (output_bmp.rdstate() != std::ofstream::goodbit); + if (write_err) { + std::cerr << "ERROR: could not write data to " << file_path << std::endl; + return false; + } + } + // Pad to multiple of 4 bytes + if (padding) { + p[0] = p[1] = p[2] = 0; + output_bmp.write(reinterpret_cast(p), 3); + bool write_err = (output_bmp.rdstate() != std::ofstream::goodbit); + if (write_err) { + std::cerr << "ERROR: could not write padding to " << file_path + << std::endl; + return false; + } + } + } + output_bmp.close(); + return true; +} + +/// @brief This convenience struct lets you manipulate color channels within +/// pixels used by bmp_tools functions. +struct PixelRGB { + // blue is least significant + uint8_t b; + uint8_t g; + uint8_t r; + uint8_t reserved; + + /// @brief Construct a `bmp_tools::PixelRGB` using an unsigned 32-bit pixel + /// used by `bmp_tools` functions. + /// @param img_pixel An unsigned 32-bit pixel used by `bmp_tools` functions + PixelRGB(uint32_t img_pixel) { + b = (img_pixel >> 0) & 0xff; + g = (img_pixel >> 8) & 0xff; + r = (img_pixel >> 16) & 0xff; + } + + /// @brief Default constructor that initializes all color channels to `0`. + PixelRGB() { PixelRGB(0); } + + /// @brief Construct a `bmp_tools::PixelRGB` using members + /// @param m_r Red color channel + /// @param m_g Green color channel + /// @param m_b Blue color channel + PixelRGB(uint8_t m_r, uint8_t m_g, uint8_t m_b) + : b(m_b), g(m_g), r(m_r), reserved(0) {} + + /// @brief Transform a `bmp_tools::PixelRGB` into an unsigned 32-bit pixel used + /// by `bmp_tools` functions. + /// @return An unsigned 32-bit pixel used by `bmp_tools` functions. + uint32_t GetImgPixel() { + uint32_t img_pixel = + ((b & 0xff) << 0) + ((g & 0xff) << 8) + ((r & 0xff) << 16); + return img_pixel; + } + + /// @brief Check that two pixels are similar to one another, that is, that + /// they differ by no more than `thresh`. + /// @param other The pixel to compare against + /// @param threshold Maximum amount by any two color channels may deviate + /// @return `true` if `this` and `other` differ by no more than `thresh` + bool CheckSimilarity(PixelRGB other, unsigned char threshold) { + bool similar = true; + if (abs(r - other.r) > threshold) similar = false; + if (abs(g - other.g) > threshold) similar = false; + if (abs(b - other.b) > threshold) similar = false; + + return similar; + } +}; + +/// @brief Compare an array of pixels with a BMP file +/// @param frame An array of unsigned integers representing pixels as understood +/// by the bitmap processing functions in `bmp_tools.hpp`. +/// @param rows Image height +/// @param cols Image width +/// @param expectedFilePath Path to a BMP file to compare `frame` against. The +/// BMP file will be parsed using the functions in `bmp_tools.hpp`. +/// @return `true` if `frame` matches the file pointed to by `expectedFilePath + bool CompareFrames(unsigned int *frame, int rows, int cols, + std::string &expectedFilePath, unsigned char threshold = 2) { + unsigned int *exp_img = nullptr; + int exp_rows, exp_cols; + bool passed = false; + if (bmp_tools::ReadBmp(expectedFilePath, &exp_img, exp_rows, exp_cols)) { + // check dimensions + bool dims_ok = (rows == exp_rows) && (cols == exp_cols); + if (!dims_ok) { + std::cerr << "ERROR: output dimensions (" << cols << ", " << rows + << ") do not match expected dimensions (" << exp_cols << ", " + << exp_rows << ")." << std::endl; + } + + bool frame_ok = (nullptr != frame); + if (!frame_ok) { + std::cerr << "ERROR: frame pointer invalid." << std::endl; + } + + bool exp_ok = (nullptr != exp_img); + if (!exp_ok) { + std::cerr << "ERROR: exp_img pointer invalid." << std::endl; + } + bool pointers_ok = exp_ok & frame_ok; + + passed = dims_ok & pointers_ok; + // compare image data + if (passed) { + for (int i = 0; i < (rows * cols); ++i) { + // Allow for some error due to fpc and fp-relaxed + passed &= PixelRGB(frame[i]).CheckSimilarity(PixelRGB(exp_img[i]), + threshold); + } + } + } else { + std::cerr << "ERROR: problem reading expected image " << expectedFilePath + << std::endl; + } + free(exp_img); + return passed; +} + +} // namespace bmp_tools + +#endif // BMP_TOOLS_H diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/comparisons.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/comparisons.hpp new file mode 100644 index 0000000000..b5c5f0485b --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/comparisons.hpp @@ -0,0 +1,20 @@ +// Copyright (c) 2024 Intel Corporation +// SPDX-License-Identifier: MIT + +// comparisons.hpp + +#ifndef __COMPARISONS_HPP__ +#define __COMPARISONS_HPP__ + +namespace fpga_tools { +template +constexpr T Min(T a, T b) { + return (a < b) ? a : b; +} + +template +constexpr T Max(T a, T b) { + return (a > b) ? a : b; +} +} // namespace fpga_tools +#endif \ No newline at end of file diff --git a/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/data_bundle.hpp b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/data_bundle.hpp new file mode 100644 index 0000000000..fea363fd80 --- /dev/null +++ b/DirectProgramming/C++SYCL_FPGA/ReferenceDesigns/convolution2d/include/data_bundle.hpp @@ -0,0 +1,190 @@ +// Copyright (c) 2024 Intel Corporation +// SPDX-License-Identifier: MIT + +// data_bundle.hpp + +#ifndef __DATA_BUNDLE_HPP__ +#define __DATA_BUNDLE_HPP__ + +namespace fpga_tools { + +///////////////////////////////////////////////////////////////////////////////// +// C++ magic that lets us extract template parameters from DataBundle instances +///////////////////////////////////////////////////////////////////////////////// + +/// @brief Extract template parameters from a DataBundle. Usage: `constexpr int +/// kPixelsInParallel = +/// fpga_tools::ExtractDataBundleType::kBundlePayloadCount;` +/// @tparam T The DataBundle whose parameters you wish to extract +template +struct ExtractDataBundleType { + typedef T BundlePayloadT; + static constexpr int kBundlePayloadCount = 0; +}; + +/// @brief Extract template parameters from a DataBundle. Usage: +/// `constexpr int kPixelsInParallel = +/// fpga_tools::ExtractDataBundleType::kBundlePayloadCount;` +/// @tparam T The DataBundle whose parameters you wish to extract +template