diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..c9eee03f --- /dev/null +++ b/.clang-format @@ -0,0 +1,11 @@ +--- +Language: Cpp +BasedOnStyle: Google +ColumnLimit: 80 +IncludeBlocks: Preserve +DerivePointerAlignment: false +PointerAlignment: Left +--- +Language: Proto +BasedOnStyle: Google +... diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..920555f1 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +# Don't copy build files into a docker image +*/build* \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e37fefa1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,47 @@ +**/build + + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# IDE stuff +*.vscode + +# Build files +*build + +# Python +*.pyc + +# Reconstructions +*.ply diff --git a/Dockerfile.build b/Dockerfile.build new file mode 100644 index 00000000..93de20df --- /dev/null +++ b/Dockerfile.build @@ -0,0 +1,10 @@ +FROM nvblox_deps + +SHELL ["/bin/bash", "-c"] + +# Copy over the nvblox (both as a standalone and into the catkin workspace) +COPY . nvblox + +# Build and test the standalone library, then build under catkin +RUN cd nvblox/nvblox && mkdir build && cd build && \ + cmake .. && make -j8 diff --git a/Dockerfile.deps b/Dockerfile.deps new file mode 100644 index 00000000..8f73c372 --- /dev/null +++ b/Dockerfile.deps @@ -0,0 +1,20 @@ +FROM nvcr.io/nvidia/cuda:11.2.0-devel-ubuntu20.04 + +# TZData goes first. +RUN apt-get update +ENV TZ Europe/Berlin +ENV DEBIAN_FRONTEND noninteractive +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN apt-get install -y tzdata + +# Install basics. +RUN apt-get update +RUN apt-get install -y ssh git jq gnupg apt-utils software-properties-common build-essential cmake + +# Install dependencies. +RUN apt-get install -y libgoogle-glog-dev libgtest-dev curl + +# Build gtest because gtest doesn't do this for you for some reason. +RUN cd /usr/src/googletest && cmake . && cmake --build . --target install + +ENV DEBIAN_FRONTEND teletype diff --git a/Dockerfile.test b/Dockerfile.test new file mode 100644 index 00000000..4e51411d --- /dev/null +++ b/Dockerfile.test @@ -0,0 +1,5 @@ +FROM nvblox + +# Build and test the standalone library, then build under catkin +CMD cd nvblox/nvblox/build && \ + cd tests/ && ctest && cd .. diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 00000000..3539f66a --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,65 @@ +pipeline { + agent { + dockerfile { + label 'isaac-gpu' + reuseNode true + filename 'Dockerfile.deps' + args '-u root --gpus all -v /var/run/docker.sock:/var/run/docker.sock:rw' + } + } + triggers { + gitlab(triggerOnMergeRequest: true, branchFilterType: 'All') + } + stages { + stage('Compile') { + steps { + sh '''nvidia-smi''' + sh '''mkdir -p nvblox/build''' + sh '''cd nvblox/build && cmake .. && make -j8''' + } + } + stage('Test') { + steps { + sh '''cd nvblox/build/tests && ctest -T test --no-compress-output''' + } + } + } + post { + always { + // Archive the CTest xml output + archiveArtifacts ( + artifacts: 'nvblox/build/tests/Testing/**/*.xml', + fingerprint: true + ) + + // Process the CTest xml output with the xUnit plugin + xunit ( + testTimeMargin: '3000', + thresholdMode: 1, + thresholds: [ + skipped(failureThreshold: '0'), + failed(failureThreshold: '0') + ], + tools: [CTest( + pattern: 'nvblox/build/Testing/**/*.xml', + deleteOutputFiles: true, + failIfNotNew: false, + skipNoTestFiles: true, + stopProcessingIfError: true + )] + ) + + // Clear the source and build dirs before next run + cleanWs() + } + failure { + updateGitlabCommitStatus name: 'build', state: 'failed' + } + success { + updateGitlabCommitStatus name: 'build', state: 'success' + } + } + options { + gitLabConnection('gitlab-master') + } +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..00f4f26c --- /dev/null +++ b/LICENSE @@ -0,0 +1,237 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 NVIDIA CORPORATION + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +============================================================================== +voxblox license for some of the source files. +============================================================================== +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + + diff --git a/README.md b/README.md new file mode 100644 index 00000000..cc166333 --- /dev/null +++ b/README.md @@ -0,0 +1,154 @@ +# nvblox +Signed Distance Functions (SDFs) on NVIDIA GPUs. + +An SDF library which offers +* Support for storage of various voxel types +* GPU accelerated agorithms such as: + * TSDF construction + * ESDF construction +* ROS2 interface (see [issac_ros_nvblox](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_nvblox)) +* ~~Python bindings~~ (coming soon) + +Do we need another SDF library? That depends on your use case. If your interested in: +* **Path planning**: We provide GPU accelerated, incremental algorithms for calculating the Euclidian Signed Distance Field (ESDF) which is useful for colision checking, which is critical robotic pathplanning. In contrast, existing GPU-accelerated libraries target reconstruction only and are general not useful in a robotics context. +* **GPU acceleration**: Our previous works [voxblox](https://github.com/ethz-asl/voxblox) and [voxgraph](https://github.com/ethz-asl/voxgraph) are used for path planning, however are CPU only, which limits the speed of these toolboxes (and therefore the resolution of the maps they can build in real-time). + +Here we show slices through a distance function generated from *nvblox* using data from the [3DMatch dataset](https://3dmatch.cs.princeton.edu/). +![slice](docs/images/nvblox_slice.gif) + +# Note from the authors +This package is under active development. Feel free to make an issue for bugs or feature requests, and we always welcome pull requests! + +# ROS2 Interface +This repo contains the core library which can be linked into users' projects. If you want to use nvblox on a robot out-of-the-box, please see our [ROS2 interface](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_nvblox), which downloads and builds the core library during installation. + +# Native Installation +If you want to build natively, please follow these instructions. Instructions for docker are [further below](#docker). + +## Install dependencies +We depend on: +- gtest +- glog +- gflags (to run experiments) +- CUDA 10.2 - 11.5 (others might work but are untested) +- Eigen (no need to explicitly install, a recent version is built into the library) +Please run +``` +sudo apt-get install -y libgoogle-glog-dev libgtest-dev libgflags-dev python3-dev +cd /usr/src/googletest && sudo cmake . && sudo cmake --build . --target install +``` + +## Build and run tests +``` +cd nvblox/nvblox +mkdir build +cd build +cmake .. && make && ctest +``` + +## Run an example +In this example we fuse data from the [3DMatch dataset](https://3dmatch.cs.princeton.edu/). First let's grab the dataset. Here I'm downloading it to my dataset folder `~/dataset/3dmatch`. +``` +wget http://vision.princeton.edu/projects/2016/3DMatch/downloads/rgbd-datasets//datasets/3dmatch/sun3d-mit_76_studyroom-76-1studyroom2.zip -P ~/datasets/3dmatch +unzip ~/datasets/3dmatch//datasets/3dmatch/sun3d-mit_76_studyroom-76-1studyroom2.zip -d ~/datasets/3dmatch +``` +Navigate to and run the `fuse_3dmatch` binary. From the nvblox base folder run +``` +cd nvblox/build/experiments +./fuse_3dmatch ~/datasets/3dmatch//datasets/3dmatch/sun3d-mit_76_studyroom-76-1studyroom2/ --esdf_frame_subsampling 3000 --mesh_output_path mesh.ply +``` +Once it's done we can view the output mesh using the Open3D viewer. +``` +pip3 install open3d +python3 ../../visualization/visualize_mesh.py mesh.ply +``` +you should see a mesh of a room: +![slice](docs/images/reconstruction_in_docker_trim.png) + +# Docker + +We have several dockerfiles which layer on top of one another for the following purposes: + +* **Docker.deps** +* * This installs our dependencies. +* * This is used in our CI, where the later steps (building and testing) are taken care of by Jenkins (and not docker). +* **Docker.build** +* * Layers on top of Docker.deps. +* * This builds our package. +* * This is where you get off the layer train if you wanna run stuff (and don't care if it's tested). +* **Docker.test** +* * Layers on top of Docker.build. +* * Runs ours tests. +* * Useful for checking if things are likely to pass the tests in CI. + +We are reliant on nvidia docker. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) following the instructions on that website. + +We use the GPU during build, not only at run time. In the default configuration the GPU is only used at at runtime. One must therefore set the default runtime. Add `"default-runtime": "nvidia"` to `/etc/docker/daemon.json` such that it looks like: +``` +{ + "runtimes": { + "nvidia": { + "path": "/usr/bin/nvidia-container-runtime", + "runtimeArgs": [] + } + }, + "default-runtime": "nvidia" +} +``` +Restart docker +``` +sudo systemctl restart docker +``` +Now Let's build Dockerfile.deps docker image. This image install contains our dependacies. +``` +docker build -t nvblox_deps -f Dockerfile.deps . +``` +Now let's build the Dockerfile.build. This image layers on the last, and actually builds the nvblox library. +``` +docker build -t nvblox -f Dockerfile.build . +``` +Now let's run the 3DMatch example inside the docker. Note there's some additional complexity in the `docker run` command such that we can forward X11 to the host (we're going to be view a reconstruction in a GUI). Run the container using: +``` +xhost local:docker +docker run -it --net=host --env="DISPLAY" -v $HOME/.Xauthority:/root/.Xauthority:rw -v /tmp/.X11-unix:/tmp/.X11-unix:rw nvblox +``` +Let's download a dataset and run the example (this is largely a repeat of "Run an example" above). +``` +apt-get update +apt-get install unzip +wget http://vision.princeton.edu/projects/2016/3DMatch/downloads/rgbd-datasets/sun3d-mit_76_studyroom-76-1studyroom2.zip -P ~/datasets/3dmatch +unzip ~/datasets/3dmatch//datasets/3dmatch/sun3d-mit_76_studyroom-76-1studyroom2.zip -d ~/datasets/3dmatch +cd nvblox/nvblox/build/experiments/ +./fuse_3dmatch ~/datasets/3dmatch//datasets/3dmatch/sun3d-mit_76_studyroom-76-1studyroom2/ --esdf_frame_subsampling 3000 --mesh_output_path mesh.ply +``` +Now let's visualize. From the same experiments folder run: +``` +apt-get install python3-pip libgl1-mesa-glx +pip3 install open3d +python3 ../../visualization/visualize_mesh.py mesh.ply +``` + +# Additional instructions for Jetson Xavier +These instructions are for a native build on the Jetson Xavier. A Docker based build is coming soon. + +The instructions for the native build above work, with one exception: + +We build using CMake's modern CUDA integration and therefore require a more modern version of CMAKE than (currently) ships with jetpack. Luckily the Cmake developer team provide a means obtaining recent versions of CMake through apt. + +1. Obtain a copy of the signing key: +``` +wget -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc | + sudo apt-key add - +``` +2. Add the repository to your sources list and update. +``` +sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' +sudo apt-get update +``` +3. Update! +``` +sudo apt-get install cmake +``` + +# License +This code is under an [open-source license](LICENSE) (Apache 2.0). :) \ No newline at end of file diff --git a/docs/images/nvblox_slice.gif b/docs/images/nvblox_slice.gif new file mode 100644 index 00000000..a0bf4c1b Binary files /dev/null and b/docs/images/nvblox_slice.gif differ diff --git a/docs/images/reconstruction_in_docker_trim.png b/docs/images/reconstruction_in_docker_trim.png new file mode 100644 index 00000000..8c7bda97 Binary files /dev/null and b/docs/images/reconstruction_in_docker_trim.png differ diff --git a/nvblox/AMENT_IGNORE b/nvblox/AMENT_IGNORE new file mode 100644 index 00000000..e69de29b diff --git a/nvblox/CMakeLists.txt b/nvblox/CMakeLists.txt new file mode 100644 index 00000000..3babb997 --- /dev/null +++ b/nvblox/CMakeLists.txt @@ -0,0 +1,228 @@ +cmake_minimum_required(VERSION 3.16) + +# set the project name and version +project(nvblox VERSION 0.0.1 LANGUAGES CXX CUDA) + +# specify the C++ standard +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_BUILD_TYPE RelWithDebInfo) + +# Build options +option(BUILD_EXPERIMENTS "Build performance experimentation binaries" ON) +option(BUILD_TESTS "Build the C++ tests of the nvblox library" ON) + +# Suppress spammy Eigen CUDA warnings. +# "expt-relaxed-constexpr" allows sharing constexpr between host and device +# code. +# "display_error_number" shows a warning number with all warnings, and the +# rest is just suppressing specific warnings from Eigen. Note that the numbers +# keep changing with every CUDA release so this list is a bit arbitrary. +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --generate-line-info -lineinfo -Xcudafe --display_error_number -Xcudafe --diag_suppress=2977 -Xcudafe --diag_suppress=3057 -Xcudafe --diag_suppress=3059 ") +set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options -fPIC") + +# New warning numbers above CUDA 11.2. +if (CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR GREATER_EQUAL 2) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=20012 -Xcudafe --diag_suppress=20011 -Xcudafe --diag_suppress=20014") +endif() + +# Download thirdparty deps +message(STATUS "Downloading 3rdparty dependencies") +message(STATUS "Downloading Eigen") +include(thirdparty/eigen/eigen.cmake) + +# Build stdgpu as part of this +message(STATUS "Downloading STDGPU") +include(thirdparty/stdgpu/stdgpu.cmake) + +# Include dem deps +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") +find_package(CUDA REQUIRED) +# In the case of ROS builds, glog will likely be found at a higher level. +# We want to link against that version in that case. +if(NOT Glog_FOUND) + find_package(Glog REQUIRED) +endif() +# TODO(alexmillane): check the ROS builds here. +find_package(gflags REQUIRED) + +# Include dirs +include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +include_directories(${EIGEN_INCLUDE_DIRS}) +include_directories(${STDGPU_INCLUDE_DIRS}) +include_directories(include) + +############# +# LIBRARIES # +############# + +add_library(nvblox_cuda_check SHARED + src/core/cuda/error_check.cu +) + +add_library(nvblox_gpu_hash SHARED + src/gpu_hash/cuda/gpu_layer_view.cu + src/gpu_hash/cuda/gpu_set.cu + src/utils/timing.cpp + src/utils/nvtx_ranges.cpp +) +add_dependencies(nvblox_gpu_hash ext_eigen stdgpu) +target_link_libraries(nvblox_gpu_hash PUBLIC + stdgpu + nvblox_cuda_check + ${CUDA_nvToolsExt_LIBRARY} +) + +add_library(nvblox_lib SHARED + src/core/bounding_boxes.cpp + src/core/camera.cpp + src/core/color.cpp + src/core/cuda/blox.cu + src/core/cuda/image_cuda.cu + src/core/cuda/warmup.cu + src/core/image.cpp + src/core/interpolation_3d.cpp + src/core/mapper.cpp + src/datasets/image_loader.cpp + src/datasets/parse_3dmatch.cpp + src/integrators/cuda/frustum.cu + src/integrators/cuda/projective_tsdf_integrator.cu + src/integrators/cuda/projective_color_integrator.cu + src/integrators/cuda/esdf_integrator.cu + src/integrators/esdf_integrator.cpp + src/integrators/frustum.cpp + src/integrators/projective_integrator_base.cpp + src/ray_tracing/cuda/sphere_tracer.cu + src/io/csv.cpp + src/io/mesh_io.cpp + src/io/ply_writer.cpp + src/mesh/marching_cubes.cu + src/mesh/mesh_block.cu + src/mesh/mesh_integrator_color.cu + src/mesh/mesh_integrator.cpp + src/mesh/mesh_integrator.cu + src/mesh/mesh.cpp + src/primitives/primitives.cpp + src/primitives/scene.cpp + src/utils/nvtx_ranges.cpp + src/utils/timing.cpp +) +add_dependencies(nvblox_lib ext_eigen) +target_link_libraries(nvblox_lib PUBLIC + ${GLOG_LIBRARIES} + gflags + ${CUDA_LIBRARIES} + ${CUDA_nvToolsExt_LIBRARY} + nvblox_gpu_hash + nvblox_cuda_check +) +target_link_libraries(nvblox_lib INTERFACE + stdgpu +) +target_include_directories(nvblox_lib PUBLIC + $ + $ + $ + $ + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} +) +set_target_properties(nvblox_lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +############ +# BINARIES # +############ +add_executable(sphere_benchmark src/benchmarks/sphere_benchmark.cpp) +target_link_libraries(sphere_benchmark + nvblox_lib +) +set_target_properties(sphere_benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +######### +# TESTS # +######### + +if (BUILD_TESTS) + add_subdirectory(tests) +endif() + +############### +# EXPERIMENTS # +############### + +if (BUILD_EXPERIMENTS) + add_subdirectory(experiments) +endif() + +############################# +# INTERFACE LIBRARY FOR ROS # +############################# +# TODO: delete +add_library(nvblox_interface INTERFACE) +target_link_libraries(nvblox_interface INTERFACE + nvblox_lib + nvblox_gpu_hash + nvblox_cuda_check + ${GLOG_LIBRARIES} + ${CUDA_LIBRARIES} + stdgpu + Eigen3::Eigen +) +target_include_directories(nvblox_interface INTERFACE include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) +add_dependencies(nvblox_interface ext_eigen ext_stdgpu) + +########## +# EXPORT # +########## +include(GNUInstallDirs) + +install( + TARGETS nvblox_lib nvblox_gpu_hash nvblox_cuda_check stdgpu + EXPORT nvblox + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} +) +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.h*" +) + + +include(CMakePackageConfigHelpers) +# generate the config file that is includes the exports +configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Config.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/nvbloxConfig.cmake" + INSTALL_DESTINATION "lib/cmake" + NO_SET_AND_CHECK_MACRO + NO_CHECK_REQUIRED_COMPONENTS_MACRO +) +# generate the version file for the config file +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/nvbloxConfigVersion.cmake" + VERSION "${nvblox_VERSION_MAJOR}.${nvblox_VERSION_MINOR}" + COMPATIBILITY AnyNewerVersion +) + +# install the configuration file +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/nvbloxConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/nvbloxConfigVersion.cmake + ${CMAKE_CURRENT_BINARY_DIR}/nvbloxTargets.cmake + DESTINATION share/nvblox/cmake) + +# generate the export targets for the build tree +# needs to be after the install(TARGETS ) command +export(EXPORT nvblox + NAMESPACE nvblox:: + FILE "${CMAKE_CURRENT_BINARY_DIR}/nvbloxTargets.cmake" +) + +install( + EXPORT nvblox + FILE nvbloxTargets.cmake + NAMESPACE nvblox:: + DESTINATION ${CMAKE_INSTALL_LIBDIR} +) diff --git a/nvblox/cmake/Config.cmake.in b/nvblox/cmake/Config.cmake.in new file mode 100644 index 00000000..38c57549 --- /dev/null +++ b/nvblox/cmake/Config.cmake.in @@ -0,0 +1,4 @@ + +@PACKAGE_INIT@ + +include ( "${CMAKE_CURRENT_LIST_DIR}/nvbloxTargets.cmake" ) diff --git a/nvblox/cmake/FindGlog.cmake b/nvblox/cmake/FindGlog.cmake new file mode 100644 index 00000000..1a7b6c09 --- /dev/null +++ b/nvblox/cmake/FindGlog.cmake @@ -0,0 +1,379 @@ +# Ceres Solver - A fast non-linear least squares minimizer +# Copyright 2015 Google Inc. All rights reserved. +# http://ceres-solver.org/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Google Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: alexs.mac@gmail.com (Alex Stewart) +# + +# FindGlog.cmake - Find Google glog logging library. +# +# This module defines the following variables: +# +# GLOG_FOUND: TRUE iff glog is found. +# GLOG_INCLUDE_DIRS: Include directories for glog. +# GLOG_LIBRARIES: Libraries required to link glog. +# FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION: True iff the version of glog found +# was built & installed / exported +# as a CMake package. +# +# The following variables control the behaviour of this module: +# +# GLOG_PREFER_EXPORTED_GLOG_CMAKE_CONFIGURATION: TRUE/FALSE, iff TRUE then +# then prefer using an exported CMake configuration +# generated by glog > 0.3.4 over searching for the +# glog components manually. Otherwise (FALSE) +# ignore any exported glog CMake configurations and +# always perform a manual search for the components. +# Default: TRUE iff user does not define this variable +# before we are called, and does NOT specify either +# GLOG_INCLUDE_DIR_HINTS or GLOG_LIBRARY_DIR_HINTS +# otherwise FALSE. +# GLOG_INCLUDE_DIR_HINTS: List of additional directories in which to +# search for glog includes, e.g: /timbuktu/include. +# GLOG_LIBRARY_DIR_HINTS: List of additional directories in which to +# search for glog libraries, e.g: /timbuktu/lib. +# +# The following variables are also defined by this module, but in line with +# CMake recommended FindPackage() module style should NOT be referenced directly +# by callers (use the plural variables detailed above instead). These variables +# do however affect the behaviour of the module via FIND_[PATH/LIBRARY]() which +# are NOT re-called (i.e. search for library is not repeated) if these variables +# are set with valid values _in the CMake cache_. This means that if these +# variables are set directly in the cache, either by the user in the CMake GUI, +# or by the user passing -DVAR=VALUE directives to CMake when called (which +# explicitly defines a cache variable), then they will be used verbatim, +# bypassing the HINTS variables and other hard-coded search locations. +# +# GLOG_INCLUDE_DIR: Include directory for glog, not including the +# include directory of any dependencies. +# GLOG_LIBRARY: glog library, not including the libraries of any +# dependencies. + +# Reset CALLERS_CMAKE_FIND_LIBRARY_PREFIXES to its value when +# FindGlog was invoked. +macro(GLOG_RESET_FIND_LIBRARY_PREFIX) + if (MSVC AND CALLERS_CMAKE_FIND_LIBRARY_PREFIXES) + set(CMAKE_FIND_LIBRARY_PREFIXES "${CALLERS_CMAKE_FIND_LIBRARY_PREFIXES}") + endif() +endmacro(GLOG_RESET_FIND_LIBRARY_PREFIX) + +# Called if we failed to find glog or any of it's required dependencies, +# unsets all public (designed to be used externally) variables and reports +# error message at priority depending upon [REQUIRED/QUIET/] argument. +macro(GLOG_REPORT_NOT_FOUND REASON_MSG) + unset(GLOG_FOUND) + unset(GLOG_INCLUDE_DIRS) + unset(GLOG_LIBRARIES) + # Make results of search visible in the CMake GUI if glog has not + # been found so that user does not have to toggle to advanced view. + mark_as_advanced(CLEAR GLOG_INCLUDE_DIR + GLOG_LIBRARY) + + glog_reset_find_library_prefix() + + # Note _FIND_[REQUIRED/QUIETLY] variables defined by FindPackage() + # use the camelcase library name, not uppercase. + if (Glog_FIND_QUIETLY) + message(STATUS "Failed to find glog - " ${REASON_MSG} ${ARGN}) + elseif (Glog_FIND_REQUIRED) + message(FATAL_ERROR "Failed to find glog - " ${REASON_MSG} ${ARGN}) + else() + # Neither QUIETLY nor REQUIRED, use no priority which emits a message + # but continues configuration and allows generation. + message("-- Failed to find glog - " ${REASON_MSG} ${ARGN}) + endif () + return() +endmacro(GLOG_REPORT_NOT_FOUND) + +# glog_message([mode] "message text") +# +# Wraps the standard cmake 'message' command, but suppresses output +# if the QUIET flag was passed to the find_package(Glog ...) call. +function(GLOG_MESSAGE) + if (NOT Glog_FIND_QUIETLY) + message(${ARGN}) + endif() +endfunction() + +# Protect against any alternative find_package scripts for this library having +# been called previously (in a client project) which set GLOG_FOUND, but not +# the other variables we require / set here which could cause the search logic +# here to fail. +unset(GLOG_FOUND) + +# ----------------------------------------------------------------- +# By default, if the user has expressed no preference for using an exported +# glog CMake configuration over performing a search for the installed +# components, and has not specified any hints for the search locations, then +# prefer a glog exported configuration if available. +if (NOT DEFINED GLOG_PREFER_EXPORTED_GLOG_CMAKE_CONFIGURATION + AND NOT GLOG_INCLUDE_DIR_HINTS + AND NOT GLOG_LIBRARY_DIR_HINTS) + glog_message(STATUS "No preference for use of exported glog CMake " + "configuration set, and no hints for include/library directories provided. " + "Defaulting to preferring an installed/exported glog CMake configuration " + "if available.") + set(GLOG_PREFER_EXPORTED_GLOG_CMAKE_CONFIGURATION TRUE) +endif() + +# On macOS, add the Homebrew prefix (with appropriate suffixes) to the +# respective HINTS directories (after any user-specified locations). This +# handles Homebrew installations into non-standard locations (not /usr/local). +# We do not use CMAKE_PREFIX_PATH for this as given the search ordering of +# find_xxx(), doing so would override any user-specified HINTS locations with +# the Homebrew version if it exists. +if (CMAKE_SYSTEM_NAME MATCHES "Darwin") + find_program(HOMEBREW_EXECUTABLE brew) + mark_as_advanced(FORCE HOMEBREW_EXECUTABLE) + if (HOMEBREW_EXECUTABLE) + # Detected a Homebrew install, query for its install prefix. + execute_process(COMMAND ${HOMEBREW_EXECUTABLE} --prefix + OUTPUT_VARIABLE HOMEBREW_INSTALL_PREFIX + OUTPUT_STRIP_TRAILING_WHITESPACE) + glog_message(STATUS "Detected Homebrew with install prefix: " + "${HOMEBREW_INSTALL_PREFIX}, adding to CMake search paths.") + list(APPEND GLOG_INCLUDE_DIR_HINTS "${HOMEBREW_INSTALL_PREFIX}/include") + list(APPEND GLOG_LIBRARY_DIR_HINTS "${HOMEBREW_INSTALL_PREFIX}/lib") + endif() +endif() + +if (GLOG_PREFER_EXPORTED_GLOG_CMAKE_CONFIGURATION) + # Try to find an exported CMake configuration for glog, as generated by + # glog versions > 0.3.4 + # + # We search twice, s/t we can invert the ordering of precedence used by + # find_package() for exported package build directories, and installed + # packages (found via CMAKE_SYSTEM_PREFIX_PATH), listed as items 6) and 7) + # respectively in [1]. + # + # By default, exported build directories are (in theory) detected first, and + # this is usually the case on Windows. However, on OS X & Linux, the install + # path (/usr/local) is typically present in the PATH environment variable + # which is checked in item 4) in [1] (i.e. before both of the above, unless + # NO_SYSTEM_ENVIRONMENT_PATH is passed). As such on those OSs installed + # packages are usually detected in preference to exported package build + # directories. + # + # To ensure a more consistent response across all OSs, and as users usually + # want to prefer an installed version of a package over a locally built one + # where both exist (esp. as the exported build directory might be removed + # after installation), we first search with NO_CMAKE_PACKAGE_REGISTRY which + # means any build directories exported by the user are ignored, and thus + # installed directories are preferred. If this fails to find the package + # we then research again, but without NO_CMAKE_PACKAGE_REGISTRY, so any + # exported build directories will now be detected. + # + # To prevent confusion on Windows, we also pass NO_CMAKE_BUILDS_PATH (which + # is item 5) in [1]), to not preferentially use projects that were built + # recently with the CMake GUI to ensure that we always prefer an installed + # version if available. + # + # NOTE: We use the NAMES option as glog erroneously uses 'google-glog' as its + # project name when built with CMake, but exports itself as just 'glog'. + # On Linux/OS X this does not break detection as the project name is + # not used as part of the install path for the CMake package files, + # e.g. /usr/local/lib/cmake/glog, where the suffix is hardcoded + # in glog's CMakeLists. However, on Windows the project name *is* + # part of the install prefix: C:/Program Files/google-glog/[include,lib]. + # However, by default CMake checks: + # C:/Program Files/ which does not + # exist and thus detection fails. Thus we use the NAMES to force the + # search to use both google-glog & glog. + # + # [1] http://www.cmake.org/cmake/help/v2.8.11/cmake.html#command:find_package + find_package(glog QUIET + NAMES google-glog glog + HINTS ${glog_DIR} ${HOMEBREW_INSTALL_PREFIX} + NO_MODULE + NO_CMAKE_PACKAGE_REGISTRY + NO_CMAKE_BUILDS_PATH) + if (glog_FOUND) + glog_message(STATUS "Found installed version of glog: ${glog_DIR}") + else() + # Failed to find an installed version of glog, repeat search allowing + # exported build directories. + glog_message(STATUS "Failed to find installed glog CMake configuration, " + "searching for glog build directories exported with CMake.") + # Again pass NO_CMAKE_BUILDS_PATH, as we know that glog is exported and + # do not want to treat projects built with the CMake GUI preferentially. + find_package(glog QUIET + NAMES google-glog glog + NO_MODULE + NO_CMAKE_BUILDS_PATH) + if (glog_FOUND) + glog_message(STATUS "Found exported glog build directory: ${glog_DIR}") + endif(glog_FOUND) + endif(glog_FOUND) + + set(FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION ${glog_FOUND}) + + if (FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION) + glog_message(STATUS "Detected glog version: ${glog_VERSION}") + set(GLOG_FOUND ${glog_FOUND}) + # glog wraps the include directories into the exported glog::glog target. + set(GLOG_INCLUDE_DIR "") + set(GLOG_LIBRARY glog::glog) + else (FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION) + glog_message(STATUS "Failed to find an installed/exported CMake " + "configuration for glog, will perform search for installed glog " + "components.") + endif (FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION) +endif(GLOG_PREFER_EXPORTED_GLOG_CMAKE_CONFIGURATION) + +if (NOT GLOG_FOUND) + # Either failed to find an exported glog CMake configuration, or user + # told us not to use one. Perform a manual search for all glog components. + + # Handle possible presence of lib prefix for libraries on MSVC, see + # also GLOG_RESET_FIND_LIBRARY_PREFIX(). + if (MSVC) + # Preserve the caller's original values for CMAKE_FIND_LIBRARY_PREFIXES + # s/t we can set it back before returning. + set(CALLERS_CMAKE_FIND_LIBRARY_PREFIXES "${CMAKE_FIND_LIBRARY_PREFIXES}") + # The empty string in this list is important, it represents the case when + # the libraries have no prefix (shared libraries / DLLs). + set(CMAKE_FIND_LIBRARY_PREFIXES "lib" "" "${CMAKE_FIND_LIBRARY_PREFIXES}") + endif (MSVC) + + # Search user-installed locations first, so that we prefer user installs + # to system installs where both exist. + list(APPEND GLOG_CHECK_INCLUDE_DIRS + /usr/local/include + /usr/local/homebrew/include # Mac OS X + /opt/local/var/macports/software # Mac OS X. + /opt/local/include + /usr/include) + # Windows (for C:/Program Files prefix). + list(APPEND GLOG_CHECK_PATH_SUFFIXES + glog/include + glog/Include + Glog/include + Glog/Include + google-glog/include # CMake installs with project name prefix. + google-glog/Include) + + list(APPEND GLOG_CHECK_LIBRARY_DIRS + /usr/local/lib + /usr/local/homebrew/lib # Mac OS X. + /opt/local/lib + /usr/lib) + # Windows (for C:/Program Files prefix). + list(APPEND GLOG_CHECK_LIBRARY_SUFFIXES + glog/lib + glog/Lib + Glog/lib + Glog/Lib + google-glog/lib # CMake installs with project name prefix. + google-glog/Lib) + + # Search supplied hint directories first if supplied. + find_path(GLOG_INCLUDE_DIR + NAMES glog/logging.h + HINTS ${GLOG_INCLUDE_DIR_HINTS} + PATHS ${GLOG_CHECK_INCLUDE_DIRS} + PATH_SUFFIXES ${GLOG_CHECK_PATH_SUFFIXES}) + if (NOT GLOG_INCLUDE_DIR OR + NOT EXISTS ${GLOG_INCLUDE_DIR}) + glog_report_not_found( + "Could not find glog include directory, set GLOG_INCLUDE_DIR " + "to directory containing glog/logging.h") + endif (NOT GLOG_INCLUDE_DIR OR + NOT EXISTS ${GLOG_INCLUDE_DIR}) + + find_library(GLOG_LIBRARY NAMES glog + HINTS ${GLOG_LIBRARY_DIR_HINTS} + PATHS ${GLOG_CHECK_LIBRARY_DIRS} + PATH_SUFFIXES ${GLOG_CHECK_LIBRARY_SUFFIXES}) + if (NOT GLOG_LIBRARY OR + NOT EXISTS ${GLOG_LIBRARY}) + glog_report_not_found( + "Could not find glog library, set GLOG_LIBRARY " + "to full path to libglog.") + endif (NOT GLOG_LIBRARY OR + NOT EXISTS ${GLOG_LIBRARY}) + + # Mark internally as found, then verify. GLOG_REPORT_NOT_FOUND() unsets + # if called. + set(GLOG_FOUND TRUE) + + # Glog does not seem to provide any record of the version in its + # source tree, thus cannot extract version. + + # Catch case when caller has set GLOG_INCLUDE_DIR in the cache / GUI and + # thus FIND_[PATH/LIBRARY] are not called, but specified locations are + # invalid, otherwise we would report the library as found. + if (GLOG_INCLUDE_DIR AND + NOT EXISTS ${GLOG_INCLUDE_DIR}/glog/logging.h) + glog_report_not_found( + "Caller defined GLOG_INCLUDE_DIR:" + " ${GLOG_INCLUDE_DIR} does not contain glog/logging.h header.") + endif (GLOG_INCLUDE_DIR AND + NOT EXISTS ${GLOG_INCLUDE_DIR}/glog/logging.h) + # TODO: This regex for glog library is pretty primitive, we use lowercase + # for comparison to handle Windows using CamelCase library names, could + # this check be better? + string(TOLOWER "${GLOG_LIBRARY}" LOWERCASE_GLOG_LIBRARY) + if (GLOG_LIBRARY AND + NOT "${LOWERCASE_GLOG_LIBRARY}" MATCHES ".*glog[^/]*") + glog_report_not_found( + "Caller defined GLOG_LIBRARY: " + "${GLOG_LIBRARY} does not match glog.") + endif (GLOG_LIBRARY AND + NOT "${LOWERCASE_GLOG_LIBRARY}" MATCHES ".*glog[^/]*") + + glog_reset_find_library_prefix() + +endif(NOT GLOG_FOUND) + +# Set standard CMake FindPackage variables if found. +if (GLOG_FOUND) + set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR}) + set(GLOG_LIBRARIES ${GLOG_LIBRARY}) +endif (GLOG_FOUND) + +# If we are using an exported CMake glog target, the include directories are +# wrapped into the target itself, and do not have to be (and are not) +# separately specified. In which case, we should not add GLOG_INCLUDE_DIRS +# to the list of required variables in order that glog be reported as found. +if (FOUND_INSTALLED_GLOG_CMAKE_CONFIGURATION) + set(GLOG_REQUIRED_VARIABLES GLOG_LIBRARIES) +else() + set(GLOG_REQUIRED_VARIABLES GLOG_INCLUDE_DIRS GLOG_LIBRARIES) +endif() + +# Handle REQUIRED / QUIET optional arguments. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Glog DEFAULT_MSG + ${GLOG_REQUIRED_VARIABLES}) + +# Only mark internal variables as advanced if we found glog, otherwise +# leave them visible in the standard GUI for the user to set manually. +if (GLOG_FOUND) + mark_as_advanced(FORCE GLOG_INCLUDE_DIR + GLOG_LIBRARY + glog_DIR) # Autogenerated by find_package(glog) +endif (GLOG_FOUND) diff --git a/nvblox/examples/python/3dmatch_example.py b/nvblox/examples/python/3dmatch_example.py new file mode 100755 index 00000000..57a74291 --- /dev/null +++ b/nvblox/examples/python/3dmatch_example.py @@ -0,0 +1,135 @@ +#!/usr/bin/python3 + +from genericpath import exists +import os +import wget +from zipfile import ZipFile +import argparse + +import numpy as np +from matplotlib import pyplot as plt +import open3d as o3d + +from nvblox.datasets.threedmatch.loader import ThreeDMatchLoader +from nvblox.conversions import open3d_conversions +import nvblox + +from icecream import ic + + +def get_colored_cubes_mesh(values, XYZ, cube_size): + # Normalizing/Clipping the values + percentile_lim_upper = 90 + percentile_lim_lower = 10 + max_val = np.percentile(values, percentile_lim_upper) + min_val = np.percentile(values, percentile_lim_lower) + values_normalized = (values - min_val) / (max_val - min_val) + values_normalized = values_normalized.clip(min=0.0, max=1.0) + # Create the values mesh + slice_mesh = o3d.geometry.TriangleMesh() + for dist, xyz in zip(values_normalized, XYZ): + box = o3d.geometry.TriangleMesh.create_box(width=cube_size, + height=cube_size, + depth=cube_size) + color = plt.cm.viridis(dist) + box.compute_vertex_normals() + box.paint_uniform_color(color[0:3]) + box.translate(np.array([xyz[0], xyz[1], xyz[2]])) + slice_mesh += box + return slice_mesh + + +def main(num_frames: int): + + # Check for data (and download it if required) + script_dir = os.path.dirname(os.path.realpath(__file__)) + data_root = os.path.join(script_dir, "data") + dataset_name = 'sun3d-mit_76_studyroom-76-1studyroom2' + dataset_dir = os.path.join(data_root, dataset_name) + if not os.path.isdir(dataset_dir): + url = 'http://vision.princeton.edu/projects/2016/3DMatch/downloads/rgbd-datasets/sun3d-mit_76_studyroom-76-1studyroom2.zip' + print('Downloading dataset from: ' + url) + wget.download(url, out=data_root) + print("Extracting dataset") + ZipFile(dataset_dir + '.zip').extractall(path=data_root) + + # Paths to the data + sequence_root_path = os.path.join(dataset_dir, "seq-01") + camera_intrinsics_path = os.path.join( + dataset_dir, "camera-intrinsics.txt") + loader = ThreeDMatchLoader(sequence_root_path, camera_intrinsics_path) + + # Create a tsdf_layer (our map/reconstruction) + block_size = nvblox.utils.block_size(voxel_size=0.05) + tsdf_layer = nvblox.TsdfLayer( + block_size, memory_type=nvblox.MemoryType.Device) + mesh_layer = nvblox.MeshLayer( + block_size, memory_type=nvblox.MemoryType.Unified) + + # TSDF Integrator + integrator = nvblox.ProjectiveTsdfIntegrator() + + # Integrating all the frames + if num_frames is None: + num_frames = loader.get_number_of_frames() + for frame_idx in range(num_frames): + print(f"Integrating frame: {frame_idx} / {num_frames}") + (depth_frame, T_L_C_mat, camera) = loader.get_frame_data(frame_idx) + if np.isnan(T_L_C_mat).any() or np.isnan(depth_frame).any(): + continue + (depth_frame, T_L_C_mat, camera) = loader.get_frame_data(frame_idx) + integrator.integrateFrame(depth_frame, T_L_C_mat, camera, tsdf_layer) + + # Mesh the tsdf_layer and safe the result as ply to a file + print("Extracting mesh from TSDF") + mesh_integrator = nvblox.mesh.MeshIntegrator() + mesh_integrator.integrateMeshFromDistanceField(tsdf_layer, mesh_layer) + + # Generate an ESDF + esdf_layer = nvblox.EsdfLayer( + tsdf_layer.block_size, memory_type=nvblox.MemoryType.Unified) + esdf_integrator = nvblox.EsdfIntegrator() + esdf_integrator.integrate_layer(tsdf_layer, esdf_layer) + + # Get the bounding box of the layer + aabb = esdf_layer.get_aabb_of_observed_voxels() + + # Get the 3D locations of horizontal slice + # Note: 3DMatch is Y-axis up, so a vertical slice is along the y axis + resolution_m = 0.1 + slice_point_ratio = 0.35 + slice_point_m = slice_point_ratio * aabb.sizes()[1] + aabb.min[1] + X, Z = np.mgrid[aabb.min[0]:aabb.max[0]:resolution_m, + aabb.min[2]:aabb.max[2]:resolution_m] + X, Y, Z = np.broadcast_arrays(X, slice_point_m, Z) + XYZ = np.stack((X.flatten(), Y.flatten(), Z.flatten())).transpose().astype(np.float32) + + # Interpolate the ESDF at slice locations + slice, flags = nvblox.interpolate_on_cpu(XYZ, esdf_layer) + + # Convert the mesh to Open3D and visualize + mesh = open3d_conversions.mesh_layer_to_open3d(mesh_layer) + mesh.compute_vertex_normals() + slice_mesh = get_colored_cubes_mesh( + slice[flags == True], XYZ[flags == True], cube_size=3.0 * resolution_m / 4.0) + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + vis.add_geometry(open3d_conversions.aabb_to_open3d(aabb)) + vis.add_geometry(slice_mesh) + vis.run() + vis.destroy_window() + + # Write the mesh to file + mesh_path = os.path.join(script_dir, "output", dataset_name + ".ply") + print("Writing the mesh to: " + mesh_path) + nvblox.io.output_mesh_layer_to_ply(mesh_layer, mesh_path) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Run an experiment.") + parser.add_argument("--number_of_frames", metavar="number_of_frames", type=int, + help="Number of frames to process. Not including this arg mean process all frames.") + args = parser.parse_args() + main(args.number_of_frames) diff --git a/nvblox/examples/python/data/.gitignore b/nvblox/examples/python/data/.gitignore new file mode 100644 index 00000000..86d0cb27 --- /dev/null +++ b/nvblox/examples/python/data/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/nvblox/examples/python/output/.gitignore b/nvblox/examples/python/output/.gitignore new file mode 100644 index 00000000..86d0cb27 --- /dev/null +++ b/nvblox/examples/python/output/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/nvblox/experiments/.gitignore b/nvblox/experiments/.gitignore new file mode 100644 index 00000000..f32081e2 --- /dev/null +++ b/nvblox/experiments/.gitignore @@ -0,0 +1 @@ +*output* \ No newline at end of file diff --git a/nvblox/experiments/CMakeLists.txt b/nvblox/experiments/CMakeLists.txt new file mode 100644 index 00000000..0e9e2f18 --- /dev/null +++ b/nvblox/experiments/CMakeLists.txt @@ -0,0 +1,47 @@ +# Common experiments library +add_library(nvblox_experiments_common SHARED + src/common/fuse_3dmatch.cpp + src/integrators/cuda/depth_frame_texture.cu + src/integrators/cuda/experimental_integrator_input_frames.cu + src/integrators/cuda/experimental_projective_tsdf_integrators.cu +) +target_include_directories(nvblox_experiments_common PUBLIC + include +) +target_link_libraries(nvblox_experiments_common nvblox_lib) +set_target_properties(nvblox_experiments_common PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +# Macro for installing python script SYMBOLICALLY to the buildspace +macro(makeLink src dest target) + add_custom_command(TARGET ${target} PRE_BUILD + COMMAND ln -sf ${src} ${dest} DEPENDS ${dest} COMMENT "mklink ${src} -> ${dest}") +endmacro() + +# 3Dmatch executable +add_executable(fuse_3dmatch src/fuse_3dmatch.cpp) +target_link_libraries(fuse_3dmatch +nvblox_experiments_common +) +set_target_properties(fuse_3dmatch PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +# Python module for experiments +set(PYTHON_EXPERIMENTS_MODULE_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/python") +set(PYTHON_EXPERIMENTS_MODULE_DST_DIR "${CMAKE_CURRENT_BINARY_DIR}/python") +file(MAKE_DIRECTORY ${PYTHON_EXPERIMENTS_MODULE_DST_DIR}) +file(COPY ${PYTHON_EXPERIMENTS_MODULE_SRC_DIR}/ + DESTINATION ${PYTHON_EXPERIMENTS_MODULE_DST_DIR}) + +# Script for comparing branches +add_subdirectory(experiments/compare_branches) + +# Experiments +# NOTE(alexmillane): Experiments disabled with "EXCLUDE_FROM_ALL" are no longer +# maintained/building but are kept around for posterity +add_subdirectory(experiments/texture_vs_global_memory_interpolation EXCLUDE_FROM_ALL) +add_subdirectory(experiments/unified_vs_device_memory) +add_subdirectory(experiments/threaded_image_loading) +add_subdirectory(experiments/realistic_timings) +add_subdirectory(experiments/vector_copies) +add_subdirectory(experiments/isolate_tsdf_block_update) +add_subdirectory(experiments/stream_compaction) +add_subdirectory(experiments/layer_cake_interface) diff --git a/nvblox/experiments/experiments/compare_branches/CMakeLists.txt b/nvblox/experiments/experiments/compare_branches/CMakeLists.txt new file mode 100644 index 00000000..1de54cce --- /dev/null +++ b/nvblox/experiments/experiments/compare_branches/CMakeLists.txt @@ -0,0 +1,3 @@ +add_custom_target( compare_branches ) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/compare_branches.py" "${CMAKE_CURRENT_BINARY_DIR}/" compare_branches) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/plot_comparison.py" "${CMAKE_CURRENT_BINARY_DIR}/" compare_branches) diff --git a/nvblox/experiments/experiments/compare_branches/__init__.py b/nvblox/experiments/experiments/compare_branches/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nvblox/experiments/experiments/compare_branches/compare_branches.py b/nvblox/experiments/experiments/compare_branches/compare_branches.py new file mode 100755 index 00000000..eba5de7c --- /dev/null +++ b/nvblox/experiments/experiments/compare_branches/compare_branches.py @@ -0,0 +1,87 @@ +#!/usr/bin/python3 + +import os +import argparse +import subprocess +from datetime import datetime + +import git + +import nvblox_experiments.threedmatch as threedmatch + +from plot_comparison import plot_timings + + +def generate_timings(dataset_path: str, other_branch_or_hash: str, num_runs: int) -> None: + + # Get this repo + # Note(alexmillane): We assume that this script is being executed within the repo under test. + this_directory = os.getcwd() + repo = git.Repo(this_directory, search_parent_directories=True) + git_root_dir = repo.git.rev_parse("--show-toplevel") + assert not repo.bare + + # Branches to time + current_branch_name = repo.active_branch.name + branch_names = [current_branch_name, other_branch_or_hash] + + # Where to place timings + datetime_str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") + output_root = os.path.join( + this_directory, 'output', datetime_str) + + # Timing each branch + build_dir = os.path.join(git_root_dir, 'nvblox/build') + for branch_name in branch_names: + + # Checkout + print('Checking out: ' + branch_name) + try: + repo.git.checkout(branch_name) + except git.GitCommandError: + print("Could not checkout branch: " + branch_name + + ". Maybe you have uncommited changes?.") + return + + # Build + if not os.path.exists(build_dir): + print("Please create a build space at: " + build_dir) + return + build_command = f"cd {build_dir} && cmake .. && make -j16" + subprocess.call(build_command, shell=True) + + # Benchmark + branch_str = branch_name.replace('/', '_') + output_dir = os.path.join(output_root, branch_str) + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + threedmatch_binary_path = os.path.join( + build_dir, 'experiments/fuse_3dmatch') + threedmatch.run_multiple(num_runs, threedmatch_binary_path, dataset_path, + output_dir) + + # Reset to the original branch + print('Checking out: ' + current_branch_name) + try: + repo.git.checkout(current_branch_name) + except git.GitCommandError: + pass + + # Plot this stuff + plot_timings(output_root) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Generate timing results comparing the current and another branch/hash.") + parser.add_argument("dataset_base_path", metavar="dataset_base_path", type=str, + help="Path to the 3DMatch dataset root directory.") + parser.add_argument("other_branch_or_hash", metavar="other_branch_or_hash", type=str, + help="The branch name or commit hash to compare against.") + parser.add_argument("--num_runs", metavar="num_runs", type=int, default=5, + help="The number of experiments over which to average the timings.") + + args = parser.parse_args() + generate_timings(args.dataset_base_path, + args.other_branch_or_hash, args.num_runs) diff --git a/nvblox/experiments/experiments/compare_branches/plot_comparison.py b/nvblox/experiments/experiments/compare_branches/plot_comparison.py new file mode 100644 index 00000000..02a38731 --- /dev/null +++ b/nvblox/experiments/experiments/compare_branches/plot_comparison.py @@ -0,0 +1,119 @@ +#!/usr/bin/python3 + +import os +import argparse +import glob +import pandas as pd +from typing import Tuple + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import rcParams + +from nvblox_experiments.timing import get_timings_as_dataframe + + +rcParams.update({'figure.autolayout': True}) + + +# Get the timings for a single run +def get_total_times(filepath: str) -> pd.Series: + timings = get_timings_as_dataframe(filepath) + timings = timings[np.logical_not(timings.index.str.startswith('3dmatch'))] + timings = timings[np.logical_not( + timings.index.str.startswith('file_loading'))] + timings = timings[np.logical_not( + timings.index.str.endswith('stbi'))] + timings = timings[np.logical_not(timings.index.str.startswith('esdf'))] + return timings["total_time"] + + +# Get timings for multiple runs and average them +def get_branch_mean_total_times(timing_dir: str) -> Tuple[pd.Series, np.ndarray]: + series = [] + results_files = glob.glob(timing_dir + "/run_*.txt") + for f in results_files: + series.append(get_total_times(f)) + total_times = pd.concat(series, axis=1) + total_times_mean = total_times.mean(axis=1) + total_times_max = total_times.max(axis=1) + total_times_min = total_times.min(axis=1) + total_times_max_min_np = np.vstack((total_times_max, total_times_min)) + total_times_err = np.abs(total_times_max_min_np - + total_times_mean.to_numpy()) + return total_times_mean, total_times_err + + +def plot_timings(timing_root_dir: str, save_fig: bool = True) -> None: + print("Looking for results in: " + timing_root_dir) + + # Check that the branch results are in the dir + result_dirs = [name for name in os.listdir(timing_root_dir) + if os.path.isdir(os.path.join(timing_root_dir, name))] + assert len( + result_dirs) == 2, "We expect a comparision between exactly two branches" + + # Branch 1 results + branch_1_name = result_dirs[0] + branch_1_results_folder = os.path.join(timing_root_dir, branch_1_name) + branch_1_mean, branch_1_err = get_branch_mean_total_times( + branch_1_results_folder) + + # Branch 2 results + branch_2_name = result_dirs[1] + branch_2_results_folder = os.path.join(timing_root_dir, branch_2_name) + branch_2_mean, branch_2_err = get_branch_mean_total_times( + branch_2_results_folder) + + # Select only the intersection of the two series. + intersection_columns = branch_1_mean.index.intersection( + branch_2_mean.index) + + branch_1_inds = [branch_1_mean.index.get_loc( + c) for c in intersection_columns] + branch_2_inds = [branch_2_mean.index.get_loc( + c) for c in intersection_columns] + + branch_1_mean_filtered = branch_1_mean[intersection_columns] + branch_2_mean_filtered = branch_2_mean[intersection_columns] + branch_1_err_filtered = branch_1_err[:, branch_1_inds] + branch_2_err_filtered = branch_2_err[:, branch_2_inds] + + assert len(branch_2_mean_filtered) == len(branch_1_mean_filtered) + + # Plot + fig, ax = plt.subplots() + bar_width = 0.35 + x = np.arange(len(branch_1_mean_filtered)) + x_device = x - bar_width / 2.0 + x_unified = x + bar_width / 2.0 + bar1 = ax.bar(x_device, branch_1_mean_filtered, bar_width, + yerr=branch_1_err_filtered, label=branch_1_name) + bar2 = ax.bar(x_unified, branch_2_mean_filtered, bar_width, + yerr=branch_2_err_filtered, label=branch_2_name) + try: + ax.bar_label(bar1, fmt='%.2f') + ax.bar_label(bar2, fmt='%.2f') + except: + pass + ax.set_xticks(x) + ax.set_xticklabels(intersection_columns, rotation='vertical') + ax.legend() + + # Save the plot in the root folder of the timings. + if save_fig: + image_path = os.path.join(timing_root_dir, 'timings.png') + print("Saving figure to disk as: " + image_path) + plt.savefig(image_path) + + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot branch timing comparisons.") + parser.add_argument("timing_root_dir", metavar="timing_root_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timing_root_dir) diff --git a/nvblox/experiments/experiments/isolate_tsdf_block_update/CMakeLists.txt b/nvblox/experiments/experiments/isolate_tsdf_block_update/CMakeLists.txt new file mode 100644 index 00000000..cd564351 --- /dev/null +++ b/nvblox/experiments/experiments/isolate_tsdf_block_update/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(isolate_tsdf_block_update main.cpp) +target_link_libraries(isolate_tsdf_block_update nvblox_lib) diff --git a/nvblox/experiments/experiments/isolate_tsdf_block_update/main.cpp b/nvblox/experiments/experiments/isolate_tsdf_block_update/main.cpp new file mode 100644 index 00000000..4903c3a8 --- /dev/null +++ b/nvblox/experiments/experiments/isolate_tsdf_block_update/main.cpp @@ -0,0 +1,118 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/types.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/integrators/integrators_common.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/utils/timing.h" + +DECLARE_bool(alsologtostderr); + +using namespace nvblox; + +// Just a class so we can acces integrator internals +class ProjectiveTsdfIntegratorExperiment : public ProjectiveTsdfIntegrator { + public: + ProjectiveTsdfIntegratorExperiment() : ProjectiveTsdfIntegrator() {} + virtual ~ProjectiveTsdfIntegratorExperiment(){}; + + // Expose this publically + void updateBlocks(const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + TsdfLayer* layer) { + ProjectiveTsdfIntegrator::updateBlocks(block_indices, depth_frame, T_L_C, + camera, truncation_distance_m, + layer); + } +}; + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + const std::string dataset_base_path = "../../../tests/data/3dmatch"; + constexpr int kSeqNum = 1; + + constexpr float kVoxelSize = 0.05; + TsdfLayer tsdf_layer(kVoxelSize, MemoryType::kDevice); + + ProjectiveTsdfIntegratorExperiment tsdf_integrator; + + const unsigned int frustum_raycast_subsampling_rate = 4; + tsdf_integrator.frustum_calculator().raycast_subsampling_factor( + frustum_raycast_subsampling_rate); + + // Update identified blocks (many times) + constexpr int kNumIntegrations = 1000; + for (int i = 0; i < kNumIntegrations; i++) { + // Load images + auto image_loader_ptr = datasets::threedmatch::createDepthImageLoader( + dataset_base_path, kSeqNum); + + DepthImage depth_frame; + CHECK(image_loader_ptr->getNextImage(&depth_frame)); + + Eigen::Matrix3f camera_intrinsics; + CHECK(datasets::threedmatch::parseCameraFromFile( + datasets::threedmatch::getPathForCameraIntrinsics(dataset_base_path), + &camera_intrinsics)); + const auto camera = Camera::fromIntrinsicsMatrix( + camera_intrinsics, depth_frame.width(), depth_frame.height()); + + Transform T_L_C; + CHECK(datasets::threedmatch::parsePoseFromFile( + datasets::threedmatch::getPathForFramePose(dataset_base_path, kSeqNum, + 0), + &T_L_C)); + + // Identify blocks we can (potentially) see (CPU) + timing::Timer blocks_in_view_timer("tsdf/integrate/get_blocks_in_view"); + const std::vector block_indices = + tsdf_integrator.getBlocksInViewUsingRaycasting( + depth_frame, T_L_C, camera, tsdf_layer.block_size()); + blocks_in_view_timer.Stop(); + + // Allocate blocks (CPU) + timing::Timer allocate_blocks_timer("tsdf/integrate/allocate_blocks"); + allocateBlocksWhereRequired(block_indices, &tsdf_layer); + allocate_blocks_timer.Stop(); + + timing::Timer update_blocks_timer("tsdf/integrate/update_blocks"); + const float truncation_distance_m = + tsdf_integrator.truncation_distance_vox() * kVoxelSize; + tsdf_integrator.updateBlocks(block_indices, depth_frame, T_L_C, camera, + truncation_distance_m, &tsdf_layer); + update_blocks_timer.Stop(); + + // Reset the layer such that we do TsdfBlock allocation. + // tsdf_layer.clear(); + } + + std::cout << timing::Timing::Print() << std::endl; + + return 0; +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/layer_cake_interface/CMakeLists.txt b/nvblox/experiments/experiments/layer_cake_interface/CMakeLists.txt new file mode 100644 index 00000000..b51552ba --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/CMakeLists.txt @@ -0,0 +1,25 @@ +add_executable(layer_cake_interface_dynamic + main_dynamic.cpp + src/cuda/user_defined_block.cu +) +target_include_directories(layer_cake_interface_dynamic PUBLIC +include +) +target_link_libraries(layer_cake_interface_dynamic nvblox_lib) + +add_executable(layer_cake_interface_static + main_static.cpp +) +target_include_directories(layer_cake_interface_static PUBLIC +include +) +target_link_libraries(layer_cake_interface_static nvblox_lib) + +add_executable(layer_cake_interface_type_erasure + main_type_erasure.cpp +) +target_include_directories(layer_cake_interface_type_erasure PUBLIC +include +) +target_link_libraries(layer_cake_interface_type_erasure nvblox_lib) + diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_common.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_common.h new file mode 100644 index 00000000..bc16d20a --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_common.h @@ -0,0 +1,32 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace experiments { + +// Unique types +template +struct unique_types; + +// Count occurances +template +struct count_type_occurrence; + +} // namespace experiments +} // namespace nvblox + +#include "nvblox/experiments/impl/cake_common_impl.h" diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_dynamic.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_dynamic.h new file mode 100644 index 00000000..d9a941ed --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_dynamic.h @@ -0,0 +1,61 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include +#include + +#include "nvblox/experiments/cake_common.h" + +namespace nvblox { +namespace experiments { + +class LayerCakeDynamic { + public: + LayerCakeDynamic(float voxel_size, MemoryType memory_type) + : voxel_size_(voxel_size), memory_type_(memory_type) {} + + template + LayerType* add(); + + template + LayerType* getPtr(); + + template + bool exists() const; + + // Factory accepting a list of LayerTypes + template + static LayerCakeDynamic create(float voxel_size, MemoryType memory_type); + + private: + // Params + const float voxel_size_; + const MemoryType memory_type_; + + // NOTE(alexmillane): Could move to multimap if we want more than one layer + // with the same type + std::unordered_map> layers_; +}; + +} // namespace experiments +} // namespace nvblox + +#include "nvblox/experiments/impl/cake_dynamic_impl.h" diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_static.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_static.h new file mode 100644 index 00000000..5b2e9105 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_static.h @@ -0,0 +1,47 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace experiments { + +template +class LayerCakeStatic { + public: + LayerCakeStatic(float voxel_size, MemoryType memory_type) + : layers_(std::move( + LayerTypes(sizeArgumentFromVoxelSize(voxel_size), + memory_type))...) {} + + // Access by LayerType. + // This requires that the list of types contained in this LayerCakeStatic are + // unique with respect to one another. + template + LayerType* getPtr(); + template + const LayerType& get() const; + + template + int count() const; + + private: + std::tuple layers_; +}; + +} // namespace experiments +} // namespace nvblox + +#include "nvblox/experiments/impl/cake_static_impl.h" diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_type_erasure.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_type_erasure.h new file mode 100644 index 00000000..550eb062 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/cake_type_erasure.h @@ -0,0 +1,95 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace experiments { + +// Note: A good resource for type erasure can be found here: +// https://github.com/alecjacobson/better-code-runtime-polymorphism/blob/master/library.hpp + +template +bool loadLayer(const std::string& filename, LayerType* layer_ptr) { + LOG(INFO) << "Loading Layer of type: " << typeid(*layer_ptr).name(); + return true; +} + +class LayerInterface { + public: + template + LayerInterface(LayerType&& layer) + : layer_(std::make_unique>(std::move(layer))) {} + + bool load(const std::string& filename) { return layer_->load(filename); } + + private: + struct LayerConcept { + virtual ~LayerConcept() = default; + virtual bool load(const std::string& filename) = 0; + }; + + template + struct LayerModel : LayerConcept { + LayerModel(LayerType&& layer) : layer_(std::move(layer)) {} + + bool load(const std::string& filename) override { + return loadLayer(filename, &layer_); + } + + // Where the layer is actually stored + LayerType layer_; + }; + + std::unique_ptr layer_; +}; + +class LayerCakeTypeErasure { + public: + LayerCakeTypeErasure(float voxel_size, MemoryType memory_type) + : voxel_size_(voxel_size), memory_type_(memory_type){}; + + template + void add() { + layers_.push_back(LayerType( + sizeArgumentFromVoxelSize(voxel_size_), memory_type_)); + } + + bool load(const std::string& filename) { + bool success = true; + for (auto& layer : layers_) { + success &= layer.load(filename); + } + return success; + } + + private: + // Params + const float voxel_size_; + const MemoryType memory_type_; + + // Data + std::vector layers_; +}; + +} // namespace experiments +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_common_impl.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_common_impl.h new file mode 100644 index 00000000..9c81fba8 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_common_impl.h @@ -0,0 +1,63 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace experiments { + +// Unique type +template +struct unique_types { + static constexpr bool value = unique_types::value && + unique_types::value && + unique_types::value; +}; + +template +struct unique_types { + static constexpr bool value = !std::is_same::value; +}; + +template +struct unique_types { + static constexpr bool value = true; +}; + +template <> +struct unique_types<> { + static constexpr bool value = true; +}; + +// Count occurances +template +struct count_type_occurrence { + static constexpr int value = count_type_occurrence::value + + count_type_occurrence::value; +}; + +template +struct count_type_occurrence { + static constexpr int value = + static_cast(std::is_same::value); +}; + +template +struct count_type_occurrence { + static constexpr int value = 0; +}; + +} // namespace experiments +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_dynamic_impl.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_dynamic_impl.h new file mode 100644 index 00000000..c2be15ae --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_dynamic_impl.h @@ -0,0 +1,73 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace experiments { + +template +LayerType* LayerCakeDynamic::add() { + if (layers_.count(typeid(LayerType)) == 0) { + // Allocate + auto layer_ptr = std::make_unique( + sizeArgumentFromVoxelSize(voxel_size_), memory_type_); + LayerType* return_ptr = layer_ptr.get(); + // Store (as BaseLayer ptr) + layers_.emplace(std::type_index(typeid(LayerType)), std::move(layer_ptr)); + LOG(INFO) << "Adding Layer with type: " << typeid(LayerType).name() + << " to LayerCake."; + return return_ptr; + } else { + LOG(WARNING) << "Request to add a LayerType that's already in the cake. " + "Currently we only support single layers of each " + "LayerType. So we did nothing."; + return nullptr; + } +} + +template +LayerType* LayerCakeDynamic::getPtr() { + const auto it = layers_.find(std::type_index(typeid(LayerType))); + if (it != layers_.end()) { + BaseLayer* base_ptr = it->second.get(); + LayerType* ptr = dynamic_cast(base_ptr); + CHECK_NOTNULL(ptr); + return ptr; + } else { + LOG(WARNING) << "Request for a LayerType which is not in the cake."; + return nullptr; + } +} + +template +bool LayerCakeDynamic::exists() const { + const auto it = layers_.find(std::type_index(typeid(LayerType))); + return (it != layers_.end()); +} + +template +LayerCakeDynamic LayerCakeDynamic::create(float voxel_size, + MemoryType memory_type) { + static_assert(unique_types::value, + "At the moment we only support LayerCakes containing unique " + "LayerTypes."); + LayerCakeDynamic cake(voxel_size, memory_type); + BaseLayer* ignored[] = {cake.add()...}; + return cake; +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_static_impl.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_static_impl.h new file mode 100644 index 00000000..55ef5aa7 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/cake_static_impl.h @@ -0,0 +1,52 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/experiments/cake_common.h" + +namespace nvblox { +namespace experiments { + +template +template +LayerType* LayerCakeStatic::getPtr() { + static_assert(count_type_occurrence::value > 0, + "LayerCake does not contain requested layer"); + static_assert(count_type_occurrence::value < 2, + "To get a Layer by type, the LayerCake must only contain " + "only a single matching LayerType"); + return &std::get(layers_); +} + +template +template +const LayerType& LayerCakeStatic::get() const { + static_assert(count_type_occurrence::value > 0, + "LayerCake does not contain requested layer"); + static_assert(count_type_occurrence::value < 2, + "To get a Layer by type, the LayerCake must only contain " + "only a single matching LayerType"); + return std::get(layers_); +} + +template +template +int LayerCakeStatic::count() const { + return count_type_occurrence::value; +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/load_impl.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/load_impl.h new file mode 100644 index 00000000..8e071e15 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/impl/load_impl.h @@ -0,0 +1,60 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace experiments { +namespace io { + +template +bool loadLayer(const std::string& filename, LayerType* layer) { + LOG(INFO) << "Loading layer of type: " << typeid(LayerType).name(); + // TODO: Actually load something. + return true; +} + +template +struct Loader; + +template +struct Loader { + static bool load(const std::string& filename, LayerCakeDynamic* cake) { + if (cake->exists()) { + LOG(WARNING) << "Not loading layer. A layer of type: " + << typeid(LayerType).name() + << " already exists in the cake."; + return false; + } + return loadLayer(filename, cake->add()); + } +}; + +template +struct Loader { + static bool load(const std::string& filename, LayerCakeDynamic* cake) { + return Loader::load(filename, cake) && + Loader::load(filename, cake); + } +}; + +template +bool load(const std::string& filename, LayerCakeDynamic* cake) { + return Loader::load(filename, cake); +} + +} // io +} // experiments +} // nvblox diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/load.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/load.h new file mode 100644 index 00000000..c127d692 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/load.h @@ -0,0 +1,36 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/experiments/cake_dynamic.h" + +namespace nvblox { +namespace experiments { +namespace io { + +template +bool load(const std::string& filename, LayerCakeDynamic* cake); + +template +bool loadLayer(const std::string& filename, LayerType* layer); + +} // namespace io +} // namespace experiments +} // namespace nvblox + +#include "nvblox/experiments/impl/load_impl.h" diff --git a/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/user_defined_block.h b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/user_defined_block.h new file mode 100644 index 00000000..402f06e5 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/include/nvblox/experiments/user_defined_block.h @@ -0,0 +1,49 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/unified_ptr.h" + +#include "nvblox/experiments/load.h" + +namespace nvblox { +namespace experiments { + +struct UserDefinedBlock { + typedef nvblox::unified_ptr Ptr; + typedef nvblox::unified_ptr ConstPtr; + + static Ptr allocate(MemoryType memory_type) { + return make_unified(memory_type); + } + + float data = 0.0f; +}; + +using UserDefinedLayer = BlockLayer; + +// Custom load function for UserDefinedBlock +namespace io { + +template <> +bool loadLayer( + const std::string& filename, experiments::UserDefinedLayer* layer); + +} // namespace io +} // namespace experiments +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/experiments/experiments/layer_cake_interface/main_dynamic.cpp b/nvblox/experiments/experiments/layer_cake_interface/main_dynamic.cpp new file mode 100644 index 00000000..61aaf182 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/main_dynamic.cpp @@ -0,0 +1,95 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include + +#include "nvblox/core/common_names.h" + +#include "nvblox/experiments/cake_dynamic.h" +#include "nvblox/experiments/load.h" +#include "nvblox/experiments/user_defined_block.h" + +using namespace nvblox; + +// Globals for convenience +const float voxel_size = 0.1f; +const MemoryType memory_type = MemoryType::kDevice; + +void addRetrieve() { + std::cout << "addRetrieve()" << std::endl; + + experiments::LayerCakeDynamic cake(voxel_size, memory_type); + + cake.add(); + TsdfLayer* tsdf_layer = cake.getPtr(); + + std::cout << "\n\n" << std::endl; +} + +void loadLayers() { + std::cout << "loadLayers()." << std::endl; + + experiments::LayerCakeDynamic cake(voxel_size, memory_type); + + std::string filename = "test.cake"; + experiments::io::load(filename, &cake); + + std::cout << "\n\n" << std::endl; +} + +void loadCustomLayer() { + std::cout << "loadCustomLayer()." << std::endl; + + const float block_size = 1.0f; + experiments::UserDefinedLayer user_defined_layer(block_size, memory_type); + + experiments::LayerCakeDynamic cake(voxel_size, memory_type); + + std::string filename = "test.cake"; + experiments::io::load(filename, &cake); + + std::cout << "\n\n" << std::endl; +} + +void buildCake() { + std::cout << "buildCake()" << std::endl; + + experiments::LayerCakeDynamic cake = + experiments::LayerCakeDynamic::create(voxel_size, memory_type); + + // Static assert + // NOTE: This creates a static assertion fail due to duplicated LayerTypes. + // LayerCakeDynamic cake_error = + // LayerCakeDynamic::create(voxel_size, + // memory_type); + + std::cout << "\n\n" << std::endl; +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + + addRetrieve(); + loadLayers(); + loadCustomLayer(); + buildCake(); + + return 0; +} diff --git a/nvblox/experiments/experiments/layer_cake_interface/main_static.cpp b/nvblox/experiments/experiments/layer_cake_interface/main_static.cpp new file mode 100644 index 00000000..82cdd268 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/main_static.cpp @@ -0,0 +1,62 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include + +#include "nvblox/core/common_names.h" + +#include "nvblox/experiments/cake_static.h" + +using namespace nvblox; + +// Globals for convenience +const float voxel_size = 0.1f; +const MemoryType memory_type = MemoryType::kDevice; + +void addRetrieve() { + std::cout << "addRetrieve()" << std::endl; + + experiments::LayerCakeStatic cake_1(voxel_size, memory_type); + + TsdfLayer* tsdf_layer = cake_1.getPtr(); + // EsdfLayer* esdf_layer = cake.getPtr(); // Compile error (missing + // layer) + + experiments::LayerCakeStatic + cake_2(voxel_size, memory_type); + + tsdf_layer = cake_2.getPtr(); + EsdfLayer* esdf_layer = cake_2.getPtr(); + + experiments::LayerCakeStatic + cake_3(voxel_size, memory_type); + CHECK_EQ(cake_3.count(), 3); + // tsdf_layer = cake_3.getPtr(); // Compile error (more than one + // TsdfLayer) + + std::cout << "\n\n" << std::endl; +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + + addRetrieve(); + + return 0; +} diff --git a/nvblox/experiments/experiments/layer_cake_interface/main_type_erasure.cpp b/nvblox/experiments/experiments/layer_cake_interface/main_type_erasure.cpp new file mode 100644 index 00000000..95e630a4 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/main_type_erasure.cpp @@ -0,0 +1,43 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" + +#include "nvblox/experiments/cake_type_erasure.h" + +using namespace nvblox; + +void load() { + const float voxel_size = 0.1f; + const MemoryType memory_type = MemoryType::kDevice; + experiments::LayerCakeTypeErasure cake(voxel_size, memory_type); + + // Add layers + cake.add(); + cake.add(); + cake.add(); + cake.add(); + + // Populate layer(s) from file + const std::string filename = "test.cake"; + cake.load(filename); +} + +int main() { + load(); + + return 0; +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/layer_cake_interface/src/cuda/user_defined_block.cu b/nvblox/experiments/experiments/layer_cake_interface/src/cuda/user_defined_block.cu new file mode 100644 index 00000000..e1b57f59 --- /dev/null +++ b/nvblox/experiments/experiments/layer_cake_interface/src/cuda/user_defined_block.cu @@ -0,0 +1,34 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/user_defined_block.h" + +#include "nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh" + +namespace nvblox { + +template class GPULayerView; + +namespace experiments { +namespace io { +template <> +bool loadLayer( + const std::string& filename, experiments::UserDefinedLayer* layer) { + LOG(INFO) << "In the user defined load function for: UserDefinedLayer"; + return true; +} +} // io +} // experiments +} // nvblox \ No newline at end of file diff --git a/nvblox/experiments/experiments/realistic_timings/CMakeLists.txt b/nvblox/experiments/experiments/realistic_timings/CMakeLists.txt new file mode 100644 index 00000000..6a590ecb --- /dev/null +++ b/nvblox/experiments/experiments/realistic_timings/CMakeLists.txt @@ -0,0 +1,4 @@ +add_custom_target( realistic_timings ) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/gather_timings.py" "${CMAKE_CURRENT_BINARY_DIR}/" realistic_timings) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/plot_timings.py" "${CMAKE_CURRENT_BINARY_DIR}/" realistic_timings) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/compare_platforms.py" "${CMAKE_CURRENT_BINARY_DIR}/" realistic_timings) diff --git a/nvblox/experiments/experiments/realistic_timings/compare_platforms.py b/nvblox/experiments/experiments/realistic_timings/compare_platforms.py new file mode 100644 index 00000000..bac01c4f --- /dev/null +++ b/nvblox/experiments/experiments/realistic_timings/compare_platforms.py @@ -0,0 +1,72 @@ +#!/usr/bin/python3 + +from nvblox.experiments.timing import get_timings_as_dataframe +import os +import argparse +import glob +import pandas as pd +from typing import Tuple +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + + +frame_rate = 30 + + +def get_total_times(filepath: str) -> pd.Series: + timings = get_timings_as_dataframe(filepath) + timings = timings[timings.index.str.startswith('3dmatch')] + def mapper(x): return x.replace("3dmatch/", "").replace("_", " ") + timings = timings.rename(mapper, axis='index') + total_frames = timings["num_calls"].max() + total_seconds = total_frames / frame_rate + return timings["total_time"].divide(total_seconds) + + +def plot_timings(timing_root_dir: str, save_fig: bool = True) -> None: + print("Looking for results in: " + timing_root_dir) + + # Check that the profile results are in the dir + result_files = [name for name in os.listdir(timing_root_dir) + if os.path.isfile(os.path.join(timing_root_dir, name))] + + # Load every text file in the results dir. + df = pd.DataFrame() + for result_name in result_files: + results_path = os.path.join(timing_root_dir, result_name) + if os.path.splitext(result_name)[1] != ".txt": + continue + timings = get_total_times(results_path) + this_df = timings.to_frame(name=os.path.splitext(result_name)[0]) + if df.empty: + df = this_df + else: + df = pd.merge(df, this_df, left_index=True, right_index=True) + + # Plot + ax = df.plot.bar(rot=0) + ax.set_ylabel('Processing Time per Second of Data (s)') + for con in ax.containers: + ax.bar_label(con, fmt='%.1f', rotation=45) + ax.tick_params(axis='x', labelrotation=45) + plt.title("Platform Comparison for 3D Match") + + # Save the plot in the root folder of the timings. + if save_fig: + image_path = os.path.join(timing_root_dir, 'timings.png') + print("Saving figure to disk as: " + image_path) + plt.savefig(image_path) + + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot 3d match timing comparisons across platforms.") + parser.add_argument("timing_root_dir", metavar="timing_root_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timing_root_dir) diff --git a/nvblox/experiments/experiments/realistic_timings/gather_timings.py b/nvblox/experiments/experiments/realistic_timings/gather_timings.py new file mode 100644 index 00000000..cd08fe51 --- /dev/null +++ b/nvblox/experiments/experiments/realistic_timings/gather_timings.py @@ -0,0 +1,68 @@ +#!/usr/bin/python3 + +import os +import time +import argparse +import subprocess +from datetime import datetime + +import git + +import nvblox.experiments.threedmatch as threedmatch + +from plot_timings import plot_timings + +profiles = {"min": {"tsdf_frame_subsampling": 3, "color_frame_subsampling": 30, + "esdf_frame_subsampling": 10, "mesh_frame_subsampling": 30}, + "goal": {"tsdf_frame_subsampling": 1, "color_frame_subsampling": 10, + "esdf_frame_subsampling": 3, "mesh_frame_subsampling": 10}} +resolutions = [0.05, 0.10, 0.20] + + +def gather_timings(dataset_path: str) -> None: + # Params + num_runs = 1 + + # Get the repo base & all needed paths. + this_directory = os.getcwd() + repo = git.Repo(this_directory, search_parent_directories=True) + git_root_dir = repo.git.rev_parse("--show-toplevel") + assert not repo.bare + build_dir = os.path.join(git_root_dir, 'nvblox/build') + threedmatch_binary_path = os.path.join( + build_dir, 'experiments/fuse_3dmatch') + + # Where to place timings + datetime_str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") + output_root = os.path.join( + this_directory, 'output', datetime_str) + + # Time each profile + for profile_name, profile_val in profiles.items(): + folder_name = profile_name + + # Time each resolution within each profile + # TODO + # Benchmark + output_dir = os.path.join(output_root, folder_name) + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + threedmatch.run_multiple( + num_runs, threedmatch_binary_path, dataset_path, output_dir, + warmup_run=False, flags=profile_val) + + # Plot this stuff + plot_timings(output_root) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Generate timing results comparing the current and another branch/hash.") + parser.add_argument( + "dataset_base_path", metavar="dataset_base_path", type=str, + help="Path to the 3DMatch dataset root directory.") + + args = parser.parse_args() + gather_timings(args.dataset_base_path) diff --git a/nvblox/experiments/experiments/realistic_timings/plot_timings.py b/nvblox/experiments/experiments/realistic_timings/plot_timings.py new file mode 100644 index 00000000..f3dcb9dc --- /dev/null +++ b/nvblox/experiments/experiments/realistic_timings/plot_timings.py @@ -0,0 +1,116 @@ +#!/usr/bin/python3 + +from nvblox.experiments.timing import get_timings_as_dataframe +import os +import argparse +import glob +import pandas as pd +from typing import Tuple +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + +# Get the timings for a single run - ONLY the 3dmatch timings. + +frame_rate = 30 + +def get_total_times(filepath: str) -> pd.Series: + timings = get_timings_as_dataframe(filepath) + timings = timings[timings.index.str.startswith('3dmatch')] + total_frames = timings["num_calls"].max() + total_seconds = total_frames / frame_rate + return timings["total_time"].divide(total_seconds) + +# Get timings for multiple runs and average them + + +def get_profile_mean_total_times(timing_dir: str) -> Tuple[pd.Series, np.ndarray]: + series = [] + results_files = glob.glob(timing_dir + "/run_*.txt") + for f in results_files: + series.append(get_total_times(f)) + total_times = pd.concat(series, axis=1) + total_times_mean = total_times.mean(axis=1) + total_times_max = total_times.max(axis=1) + total_times_min = total_times.min(axis=1) + total_times_max_min_np = np.vstack((total_times_max, total_times_min)) + total_times_err = np.abs(total_times_max_min_np - + total_times_mean.to_numpy()) + return total_times_mean, total_times_err + + +def plot_timings(timing_root_dir: str, save_fig: bool = True) -> None: + print("Looking for results in: " + timing_root_dir) + + # Check that the profile results are in the dir + result_dirs = [name for name in os.listdir(timing_root_dir) + if os.path.isdir(os.path.join(timing_root_dir, name))] + assert len( + result_dirs) == 2, "We expect a comparision between exactly two profiles (out of laziness)" + + # Profile 1 results + profile_1_name = result_dirs[0] + profile_1_results_folder = os.path.join(timing_root_dir, profile_1_name) + profile_1_mean, profile_1_err = get_profile_mean_total_times( + profile_1_results_folder) + + # Profile 2 results + profile_2_name = result_dirs[1] + profile_2_results_folder = os.path.join(timing_root_dir, profile_2_name) + profile_2_mean, profile_2_err = get_profile_mean_total_times( + profile_2_results_folder) + + # Select only the intersection of the two series. + intersection_columns = profile_1_mean.index.intersection( + profile_2_mean.index) + + profile_1_inds = [profile_1_mean.index.get_loc( + c) for c in intersection_columns] + profile_2_inds = [profile_2_mean.index.get_loc( + c) for c in intersection_columns] + + profile_1_mean_filtered = profile_1_mean[intersection_columns] + profile_2_mean_filtered = profile_2_mean[intersection_columns] + profile_1_err_filtered = profile_1_err[:, profile_1_inds] + profile_2_err_filtered = profile_2_err[:, profile_2_inds] + + assert len(profile_2_mean_filtered) == len(profile_1_mean_filtered) + + # Plot + fig, ax = plt.subplots() + bar_width = 0.35 + x = np.arange(len(profile_1_mean)) + x_device = x - bar_width / 2.0 + x_unified = x + bar_width / 2.0 + bar1 = ax.bar(x_device, profile_1_mean_filtered, bar_width, + yerr=profile_1_err_filtered, label=profile_1_name) + bar2 = ax.bar(x_unified, profile_2_mean_filtered, bar_width, + yerr=profile_2_err_filtered, label=profile_2_name) + ax.set_xticks(x) + ax.set_xticklabels(intersection_columns, rotation='vertical') + try: + ax.bar_label(bar1, fmt='%.2f') + ax.bar_label(bar2, fmt='%.2f') + except: + pass + ax.legend() + ax.set_ylabel('Processing Time per Second of Data (s)') + + # Save the plot in the root folder of the timings. + if save_fig: + image_path = os.path.join(timing_root_dir, 'timings.png') + print("Saving figure to disk as: " + image_path) + plt.savefig(image_path) + + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot profile timing comparisons.") + parser.add_argument("timing_root_dir", metavar="timing_root_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timing_root_dir) diff --git a/nvblox/experiments/experiments/stream_compaction/CMakeLists.txt b/nvblox/experiments/experiments/stream_compaction/CMakeLists.txt new file mode 100644 index 00000000..b31e0eb5 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable(stream_compaction +main.cpp +lib/stream_compaction.cu +lib/cached_allocator.cu +) +target_include_directories(stream_compaction PUBLIC +include +) +target_link_libraries(stream_compaction nvblox_experiments_common) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/run.py" "${CMAKE_CURRENT_BINARY_DIR}/" stream_compaction) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/plot.py" "${CMAKE_CURRENT_BINARY_DIR}/" stream_compaction) diff --git a/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/cached_allocator.h b/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/cached_allocator.h new file mode 100644 index 00000000..83bedbe4 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/cached_allocator.h @@ -0,0 +1,40 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +namespace nvblox { +namespace experiments { + +struct CachedAllocator { + typedef char value_type; + + CachedAllocator() {} + ~CachedAllocator(); + + char* allocate(std::ptrdiff_t num_bytes); + + void deallocate(char* ptr, size_t n); + void free(); + + std::ptrdiff_t num_bytes_allocated_ = 0; + char* memory_ = nullptr; + bool in_use_ = false; +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/stream_compaction.h b/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/stream_compaction.h new file mode 100644 index 00000000..ede94c1a --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/include/nvblox/experiments/stream_compaction.h @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/unified_vector.h" + +#include "nvblox/experiments/cached_allocator.h" + +namespace nvblox { +namespace experiments { + +class StreamCompactor { + public: + StreamCompactor() {} + + int streamCompactionOnGPU(const device_vector& data, + const device_vector& stencil, + device_vector* compact_data_ptr, + bool use_cached_allocator = true); + + private: + CachedAllocator cached_allocator_; +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/stream_compaction/lib/cached_allocator.cu b/nvblox/experiments/experiments/stream_compaction/lib/cached_allocator.cu new file mode 100644 index 00000000..993d30d4 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/lib/cached_allocator.cu @@ -0,0 +1,64 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/cached_allocator.h" + +#include + +#include + +namespace nvblox { +namespace experiments { + +CachedAllocator::~CachedAllocator() { free(); } + +char* CachedAllocator::allocate(std::ptrdiff_t num_bytes) { + CHECK(!in_use_) << "This allocator is only for single consumers."; + + // If there's enough memory, + if (num_bytes <= num_bytes_allocated_) { + // don't allocate anything and return the block we have. + } else { + try { + // Free old block + if (memory_ != nullptr) { + thrust::cuda::free(thrust::cuda::pointer(memory_)); + } + + // allocate memory and convert cuda::pointer to raw pointer + memory_ = thrust::cuda::malloc(num_bytes).get(); + num_bytes_allocated_ = num_bytes; + } catch (std::runtime_error& e) { + throw; + } + } + + in_use_ = true; + return memory_; +} + +void CachedAllocator::deallocate(char* ptr, size_t n) { + in_use_ = false; +} + +void CachedAllocator::free() { + thrust::cuda::free(thrust::cuda::pointer(memory_)); + num_bytes_allocated_ = 0; + in_use_ = false; + memory_ = nullptr; +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/stream_compaction/lib/stream_compaction.cu b/nvblox/experiments/experiments/stream_compaction/lib/stream_compaction.cu new file mode 100644 index 00000000..2b46edf7 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/lib/stream_compaction.cu @@ -0,0 +1,53 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/stream_compaction.h" + +#include + +#include "nvblox/utils/timing.h" + +namespace nvblox { +namespace experiments { + +int StreamCompactor::streamCompactionOnGPU(const device_vector& data, + const device_vector& stencil, + device_vector* compact_data_ptr, + bool use_cached_allocator) { + int* output_end_it; + if (!use_cached_allocator) { + output_end_it = thrust::copy_if(thrust::device, // NOLINT + data.data(), // NOLINT + data.data() + data.size(), // NOLINT + stencil.data(), // NOLINT + compact_data_ptr->data(), // NOLINT + thrust::identity()); + + } else { + output_end_it = + thrust::copy_if(thrust::cuda::par(cached_allocator_), // NOLINT + data.data(), // NOLINT + data.data() + data.size(), // NOLINT + stencil.data(), // NOLINT + compact_data_ptr->data(), // NOLINT + thrust::identity()); + } + + const int num_true = output_end_it - data.data(); + return num_true; +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/experiments/stream_compaction/main.cpp b/nvblox/experiments/experiments/stream_compaction/main.cpp new file mode 100644 index 00000000..a10ab728 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/main.cpp @@ -0,0 +1,106 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/unified_vector.h" +#include "nvblox/utils/timing.h" + +#include "nvblox/experiments/stream_compaction.h" + +using namespace nvblox; + +DEFINE_int32(num_compactions, 100, + "How many stream compactions to run and average timings over."); +DEFINE_int32(num_bytes, 1e7, "How many bytes in the data to compact."); +DEFINE_string(timing_output_path, "./timings.txt", + "File in which to save the timing results."); + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + // Params + const int num_trials = FLAGS_num_compactions; + const int num_bytes = FLAGS_num_bytes; + + // Warmup + warmupCuda(); + + // Data + const int kNumElements = num_bytes / sizeof(int); + std::vector data(kNumElements); + std::iota(data.begin(), data.end(), 0); + device_vector data_device(data); + host_vector data_host(kNumElements); + device_vector data_compact_device(kNumElements); + host_vector data_compact_host(kNumElements); + + // Stencil + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distribution(0, 1); + host_vector stencil(kNumElements); + std::generate(stencil.begin(), stencil.end(), + [&]() -> bool { return static_cast(distribution(gen)); }); + const device_vector stencil_device = stencil; + host_vector stencil_host(kNumElements); + + // Experiment #1 + // - transfer data and stencil device -> host + // - stream compaction on host + // - transfer compact stream host -> device + timing::Timer host_timer("host_compaction"); + for (int trial_idx = 0; trial_idx < num_trials; trial_idx++) { + // Stecil + data, device -> host + data_host = data_device; + stencil_host = stencil_device; + // Stream compaction on host + data_compact_host.resize(0); + for (int i = 0; i < stencil_host.size(); i++) { + if (stencil_host[i]) { + data_compact_host.push_back(data_host[i]); + } + } + // Data, host -> device + data_compact_device = data_compact_host; + } + host_timer.Stop(); + + // Experiment #2 + // - perform compaction on the device + timing::Timer device_timer("device_compaction"); + experiments::StreamCompactor stream_compactor; + for (int trial_idx = 0; trial_idx < num_trials; trial_idx++) { + stream_compactor.streamCompactionOnGPU(data_device, stencil_device, + &data_compact_device); + } + device_timer.Stop(); + + std::cout << timing::Timing::Print() << std::endl; + std::ofstream timing_file(FLAGS_timing_output_path, std::ofstream::out); + timing_file << timing::Timing::Print(); + timing_file.close(); +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/stream_compaction/plot.py b/nvblox/experiments/experiments/stream_compaction/plot.py new file mode 100644 index 00000000..3eeaf624 --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/plot.py @@ -0,0 +1,72 @@ + +from nvblox.experiments.timing import get_timings_as_dataframe +import os +import argparse +import glob +import numpy as np +import pandas as pd +import re +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + + +# Get the timings for a single run +def get_total_times(filepath: str) -> pd.Series: + timings = get_timings_as_dataframe(filepath) + return timings["total_time"] + + +def get_platform_timings(timings_dir: str, platform_name: str = None) -> pd.DataFrame: + results_files = glob.glob(timings_dir + "/timings_*.txt") + results_files.sort() + df = pd.DataFrame() + for f in results_files: + + # Extract datasize + data_size = int(re.search('timings_(.+?).txt', f).group(1)) + + # Total times -> timg/byte + timings = get_total_times(f) + timings /= data_size + + # Add to timings for each datasize + this_df = timings.to_frame(name=str(data_size) + " bytes") + if df.empty: + df = this_df + else: + df = pd.merge(df, this_df, left_index=True, right_index=True) + if platform_name is not None: + df = df.rename(index={'host_compaction': platform_name + ' host'}) + df = df.rename(index={'device_compaction': platform_name + ' device'}) + print(df) + return df + + +def plot_timings(timings_dir: str): + + df = pd.DataFrame() + platform_timings = [] + for platform in os.listdir(timings_dir): + print("collecting timings for platform: " + platform) + + platform_timings.append(get_platform_timings( + os.path.join(timings_dir, platform), platform)) + df = pd.concat(platform_timings) + + # Plotting + df.plot.bar() + plt.yscale('log') + plt.ylabel('Time per byte (s)') + plt.xlabel('Platform/method') + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot stream compaction timing comparisons.") + parser.add_argument("timings_dir", metavar="timings_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timings_dir) diff --git a/nvblox/experiments/experiments/stream_compaction/run.py b/nvblox/experiments/experiments/stream_compaction/run.py new file mode 100644 index 00000000..12a219bf --- /dev/null +++ b/nvblox/experiments/experiments/stream_compaction/run.py @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +import os +import git +import subprocess + + +# Experiment params +# The list of sizes over which to run the experiments +data_sizes = [1e3, 1e4, 1e5, 1e6, 1e7] + + +def run() -> None: + + # Create the output path + # datetime_str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") + this_directory = os.getcwd() + output_root = os.path.join(this_directory, 'output', 'platform') + os.makedirs(output_root, exist_ok=True) + + # Binary path + repo = git.Repo(this_directory, search_parent_directories=True) + git_root_dir = repo.git.rev_parse("--show-toplevel") + assert not repo.bare + build_dir = os.path.join(git_root_dir, 'nvblox/build') + binary_path = os.path.join( + build_dir, 'experiments/experiments/stream_compaction/stream_compaction') + + # Running the experiment for each size + for data_size in data_sizes: + + output_path = os.path.join( + output_root, "timings_" + str(int(data_size)) + ".txt") + + args_string = "--timing_output_path " + output_path + args_string += " --num_bytes " + str(int(data_size)) + run_string = binary_path + ' ' + args_string + print("Running experiment as:\n " + run_string) + subprocess.call(run_string, shell=True) + + +if __name__ == '__main__': + run() diff --git a/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/CMakeLists.txt b/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/CMakeLists.txt new file mode 100644 index 00000000..89df593b --- /dev/null +++ b/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(texture_vs_global_memory_interpolation main.cpp) +target_link_libraries(texture_vs_global_memory_interpolation nvblox_experiments_common) diff --git a/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/main.cpp b/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/main.cpp new file mode 100644 index 00000000..3fda8bb8 --- /dev/null +++ b/nvblox/experiments/experiments/texture_vs_global_memory_interpolation/main.cpp @@ -0,0 +1,57 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/utils/timing.h" + +#include "nvblox/experiments/common/fuse_3dmatch.h" +#include "nvblox/experiments/integrators/experimental_projective_tsdf_integrators.h" + +DECLARE_bool(alsologtostderr); + +int main(int argc, char* argv[]) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + auto fuser = + nvblox::experiments::Fuse3DMatch::createFromCommandLineArgs(argc, argv); + + // Run texture-based interpolation experiment + *fuser.mapper().tsdf_integrator_ptr() = nvblox::experiments::ProjectiveTsdfIntegratorExperimentsTexture>(); + int res_flag = fuser.run(); + if (res_flag != 0) { + return res_flag; + } + const std::string texture_based_timing = nvblox::timing::Timing::Print(); + nvblox::timing::Timing::Reset(); + + // Run global-memory-based interpolation experiment + fuser.tsdf_integrator_ = std::make_shared< + nvblox::experiments::ProjectiveTsdfIntegratorExperimentsGlobal>(); + res_flag = fuser.run(); + const std::string global_based_timing = nvblox::timing::Timing::Print(); + + // Printing the results + std::cout << "\n\n\nTexture-based interpolation timings" << std::endl; + std::cout << "-----------" << std::endl; + std::cout << texture_based_timing << std::endl; + std::cout << "\n\n\nGlobal-based interpolation timings" << std::endl; + std::cout << "-----------" << std::endl; + std::cout << global_based_timing << std::endl; + + return res_flag; +} diff --git a/nvblox/experiments/experiments/threaded_image_loading/CMakeLists.txt b/nvblox/experiments/experiments/threaded_image_loading/CMakeLists.txt new file mode 100644 index 00000000..8db16e0c --- /dev/null +++ b/nvblox/experiments/experiments/threaded_image_loading/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(threaded_image_loading main.cpp) +target_link_libraries(threaded_image_loading nvblox_experiments_common) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/run.py" "${CMAKE_CURRENT_BINARY_DIR}/" threaded_image_loading) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/plot_comparison.py" "${CMAKE_CURRENT_BINARY_DIR}/" threaded_image_loading) diff --git a/nvblox/experiments/experiments/threaded_image_loading/__init__.py b/nvblox/experiments/experiments/threaded_image_loading/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nvblox/experiments/experiments/threaded_image_loading/main.cpp b/nvblox/experiments/experiments/threaded_image_loading/main.cpp new file mode 100644 index 00000000..ecc2e89d --- /dev/null +++ b/nvblox/experiments/experiments/threaded_image_loading/main.cpp @@ -0,0 +1,89 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/layer.h" +#include "nvblox/datasets/parse_3dmatch.h" + +#include "nvblox/experiments/common/fuse_3dmatch.h" + +DEFINE_bool(single_thread, false, "Load images using a single thread."); +DEFINE_bool(multi_thread, false, "Load images using multiple threads"); +DEFINE_int32(num_threads, -1, + "Number of threads to load images with in multithreaded mode."); +DEFINE_int32(num_images, -1, "Number of images to process in this experiment."); +DEFINE_string(timing_output_path, "", + "File in which to save the timing results."); + +DECLARE_bool(alsologtostderr); + +using namespace nvblox; + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + std::string base_path; + if (argc < 2) { + LOG(FATAL) << "Must pass path the 3DMatch dataset root directory."; + return 0; + } else { + LOG(INFO) << "Loading data from: " << argv[1]; + base_path = argv[1]; + } + + CHECK(FLAGS_single_thread || FLAGS_multi_thread) + << "Must select either single or multi-threaded."; + if (FLAGS_multi_thread) { + CHECK_GT(FLAGS_num_threads, 0) + << "Please specify the number of threads to run with."; + } + + std::string esdf_output_path = "./3dmatch_esdf_pointcloud.ply"; + std::string mesh_output_path = "./3dmatch_mesh.ply"; + experiments::Fuse3DMatch fuser(base_path, FLAGS_timing_output_path, + mesh_output_path, esdf_output_path); + if (FLAGS_num_images > 0) { + fuser.num_frames_to_integrate_ = FLAGS_num_images; + } + + // Replacing the image loader with the one we are asked to test. + if (FLAGS_single_thread) { + fuser.depth_image_loader_ = datasets::threedmatch::createDepthImageLoader( + base_path, fuser.sequence_num_); + fuser.color_image_loader_ = datasets::threedmatch::createColorImageLoader( + base_path, fuser.sequence_num_); + } else { + fuser.depth_image_loader_ = + datasets::threedmatch::createMultithreadedDepthImageLoader( + base_path, fuser.sequence_num_, FLAGS_num_threads); + fuser.color_image_loader_ = + datasets::threedmatch::createMultithreadedColorImageLoader( + base_path, fuser.sequence_num_, FLAGS_num_threads); + } + + warmupCuda(); + fuser.run(); + + return 0; +} diff --git a/nvblox/experiments/experiments/threaded_image_loading/plot_comparison.py b/nvblox/experiments/experiments/threaded_image_loading/plot_comparison.py new file mode 100644 index 00000000..8b632d83 --- /dev/null +++ b/nvblox/experiments/experiments/threaded_image_loading/plot_comparison.py @@ -0,0 +1,51 @@ +from nvblox.experiments.timing import get_timings_as_dataframe +import os +import argparse +import glob + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + + +def plot_timings(timing_root_dir: str) -> None: + + print("Loading result files at: " + timing_root_dir) + files = glob.glob(os.path.join(timing_root_dir, '*timings.txt')) + files.sort() + files.insert(0, files.pop()) + + total_times = [] + labels = [] + for filepath in files: + print("Loading a file at: " + filepath) + timing_dataframe = get_timings_as_dataframe(filepath) + total_time_series = timing_dataframe['total_time'] + total_time = total_time_series.at['3dmatch/file_loading'] + total_times.append(total_time) + labels.append(filepath.split('/')[-1].split('.')[0]) + + # Plot + fig, ax = plt.subplots() + x = np.arange(len(total_times)) + ax.bar(x, total_times) + ax.set_ylabel("Load time (s)") + ax.set_xlabel("Test") + ax.set_xticks(x) + ax.set_xticklabels(labels, rotation='vertical') + + fig_path = os.path.join(timing_root_dir, 'timings.png') + plt.savefig(fig_path) + + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot branch timing comparisons.") + parser.add_argument("timing_root_dir", metavar="timing_root_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timing_root_dir) diff --git a/nvblox/experiments/experiments/threaded_image_loading/run.py b/nvblox/experiments/experiments/threaded_image_loading/run.py new file mode 100644 index 00000000..96340de0 --- /dev/null +++ b/nvblox/experiments/experiments/threaded_image_loading/run.py @@ -0,0 +1,68 @@ +import os +import subprocess +import argparse +from enum import Enum + +from plot_comparison import plot_timings + + +class LoadType(Enum): + kSingle = 1 + kMulti = 2 + + +def run_single(dataset_base_path: str, timing_output_path: str, load_type: LoadType, num_threads: int = -1, num_images: int = -1): + # Launch the 3DMatch reconstruction + experiment_executable_name = 'threaded_image_loading' + if load_type == LoadType.kSingle: + memory_flag = '--single_thread' + else: + memory_flag = '--multi_thread' + args_string = dataset_base_path + args_string += ' ' + memory_flag + args_string += ' --timing_output_path ' + timing_output_path + if load_type == LoadType.kMulti: + args_string += ' --num_threads ' + str(num_threads) + print(f"num_images: {num_images}") + if num_images > 0: + args_string += ' --num_images ' + str(num_images) + print("args_string: " + args_string) + subprocess.call(['./' + experiment_executable_name + + ' ' + args_string], shell=True) + + +def run_experiment(dataset_base_path: str, number_of_images: int): + + print(f"number_of_images: {number_of_images}") + + # Create the timings output directory + this_directory = os.getcwd() + output_dir = os.path.join(this_directory, 'output') + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + + output_path = os.path.join(output_dir, "single_threaded_timings.txt") + run_single(dataset_base_path, output_path, + LoadType.kSingle, num_images=number_of_images) + + thread_nums = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + for num_threads in thread_nums: + output_path = os.path.join( + output_dir, f"multi_threaded_{num_threads:02}_timings.txt") + run_single(dataset_base_path, output_path, + LoadType.kMulti, num_threads=num_threads, num_images=number_of_images) + + plot_timings(output_dir) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Run an experiment.") + parser.add_argument("dataset_base_path", metavar="dataset_base_path", type=str, + help="Path to the 3DMatch dataset root directory.") + parser.add_argument("--number_of_images", metavar="number_of_images", type=int, default=200, + help="Number of images to load.") + args = parser.parse_args() + if args.dataset_base_path: + run_experiment(args.dataset_base_path, + args.number_of_images) diff --git a/nvblox/experiments/experiments/unified_vs_device_memory/CMakeLists.txt b/nvblox/experiments/experiments/unified_vs_device_memory/CMakeLists.txt new file mode 100644 index 00000000..95b03945 --- /dev/null +++ b/nvblox/experiments/experiments/unified_vs_device_memory/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(unified_vs_device_memory main.cpp) +target_link_libraries(unified_vs_device_memory nvblox_experiments_common) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/run.py" "${CMAKE_CURRENT_BINARY_DIR}/" unified_vs_device_memory) +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/analyze_results.py" "${CMAKE_CURRENT_BINARY_DIR}/" unified_vs_device_memory) diff --git a/nvblox/experiments/experiments/unified_vs_device_memory/analyze_results.py b/nvblox/experiments/experiments/unified_vs_device_memory/analyze_results.py new file mode 100644 index 00000000..6bbcee11 --- /dev/null +++ b/nvblox/experiments/experiments/unified_vs_device_memory/analyze_results.py @@ -0,0 +1,55 @@ +import glob + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + +from nvblox.experiments.timing import get_timings_as_dataframe + + +def get_relavent_timings(filepath: str): + timings = get_timings_as_dataframe(filepath) + timings = timings.drop(labels="3dmatch/time_per_frame") + timings = timings.drop(labels="3dmatch/file_loading") + return timings["total_time"] + + +def main(): + # Load the data + input_root = "./output" + + # Averaging the experiment runs + series = [] + device_timing_files = glob.glob(input_root + "/device_*.txt") + for f in device_timing_files: + series.append(get_relavent_timings(f)) + device_total_times = pd.concat(series, axis=1) + device_total_times_mean = device_total_times.mean(axis=1) + device_total_times_std = device_total_times.std(axis=1) + + series = [] + unified_timing_files = glob.glob(input_root + "/unified_*.txt") + for f in unified_timing_files: + series.append(get_relavent_timings(f)) + unified_total_times = pd.concat(series, axis=1) + unified_total_times_mean = unified_total_times.mean(axis=1) + unified_total_times_std = unified_total_times.std(axis=1) + + # Plot + fig, ax = plt.subplots() + bar_width = 0.35 + x = np.arange(len(device_total_times_mean)) + x_device = x - bar_width / 2.0 + x_unified = x + bar_width / 2.0 + ax.bar(x_device, device_total_times_mean, bar_width, yerr=device_total_times_std, label='device') + ax.bar(x_unified, unified_total_times_mean, bar_width, yerr=unified_total_times_std, label='unified') + ax.set_xticks(x) + ax.set_xticklabels(device_total_times_mean.index, rotation='vertical') + ax.legend() + plt.show() + + +if __name__ == '__main__': + main() diff --git a/nvblox/experiments/experiments/unified_vs_device_memory/main.cpp b/nvblox/experiments/experiments/unified_vs_device_memory/main.cpp new file mode 100644 index 00000000..e8aed8c5 --- /dev/null +++ b/nvblox/experiments/experiments/unified_vs_device_memory/main.cpp @@ -0,0 +1,80 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/layer.h" + +#include "nvblox/experiments/common/fuse_3dmatch.h" + +DEFINE_bool(unified_memory, false, "Run the test using unified memory"); +DEFINE_bool(device_memory, false, "Run the test using device memory"); +DEFINE_string(timing_output_path, "./3dmatch_timings.txt", + "File in which to save the timing results."); + +DECLARE_bool(alsologtostderr); + +using namespace nvblox; + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + if ((!FLAGS_unified_memory && !FLAGS_device_memory) || + (FLAGS_unified_memory && FLAGS_device_memory)) { + std::cout << "Must select either --unified_memory or --device_memory\n"; + return 0; + } + + std::string dataset_base_path; + if (argc < 2) { + // Try out running on the test datasets. + dataset_base_path = "../tests/data/3dmatch"; + std::cout << "Please specify 3DMatch file path.\n"; + } else { + dataset_base_path = argv[1]; + std::cout << "Loading 3DMatch files from " << dataset_base_path << ".\n"; + } + + std::string esdf_output_path = "./3dmatch_esdf_pointcloud.ply"; + std::string mesh_output_path = "./3dmatch_mesh.ply"; + experiments::Fuse3DMatch fuser(dataset_base_path, FLAGS_timing_output_path, + mesh_output_path, esdf_output_path); + + // Replacing the TSDF and colors layers with layers stored in the appropriate + // memory type + if (FLAGS_device_memory) { + fuser.mapper().layers() = + LayerCake::create( + fuser.voxel_size_m_, MemoryType::kDevice); + } else { + fuser.mapper().layers() = + LayerCake::create( + fuser.voxel_size_m_, MemoryType::kUnified, MemoryType::kUnified, + MemoryType::kDevice, MemoryType::kDevice); + } + + warmupCuda(); + fuser.run(); + + return 0; +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/unified_vs_device_memory/run.py b/nvblox/experiments/experiments/unified_vs_device_memory/run.py new file mode 100755 index 00000000..3e928894 --- /dev/null +++ b/nvblox/experiments/experiments/unified_vs_device_memory/run.py @@ -0,0 +1,56 @@ +import os +import subprocess +import argparse +from enum import Enum + + +class MemoryType(Enum): + kDevice = 1 + kUnified = 2 + + +def run_single(dataset_base_path: str, timing_output_path: str, memory_type: MemoryType): + # Launch the 3DMatch reconstruction + experiment_executable_name = 'unified_vs_device_memory' + if memory_type == MemoryType.kUnified: + memory_flag = '--unified_memory' + else: + memory_flag = '--device_memory' + args_string = memory_flag + " --timing_output_path " + \ + timing_output_path + ' ' + dataset_base_path + print("args_string: " + args_string) + subprocess.call(['./' + experiment_executable_name + + ' ' + args_string], shell=True) + + +def run_experiment(dataset_base_path: str, number_of_runs: int, warm_up_run: bool): + + # Create the timings output directory + if not os.path.isdir("output"): + os.mkdir("output") + + # Run a bunch of times + run_indices = list(range(number_of_runs)) + if warm_up_run: + run_indices.insert(0, 0) + for run_idx in run_indices: + print("Run: " + str(run_idx) + ", Memory type: kDevice") + timing_output_path = "./output/device_" + str(run_idx) + ".txt" + run_single(dataset_base_path, timing_output_path, MemoryType.kDevice) + print("Run: " + str(run_idx) + ", Memory type: kUnified") + timing_output_path = "./output/unified_" + str(run_idx) + ".txt" + run_single(dataset_base_path, timing_output_path, MemoryType.kUnified) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Run an experiment.") + parser.add_argument("dataset_base_path", metavar="dataset_base_path", type=str, + help="Path to the 3DMatch dataset root directory.") + parser.add_argument("--number_of_runs", metavar="number_of_runs", type=int, default=10, + help="Number of runs to do in this experiment.") + args = parser.parse_args() + warm_up_run = True + if args.dataset_base_path: + run_experiment(args.dataset_base_path, + args.number_of_runs, warm_up_run) diff --git a/nvblox/experiments/experiments/vector_copies/CMakeLists.txt b/nvblox/experiments/experiments/vector_copies/CMakeLists.txt new file mode 100644 index 00000000..49e5ef46 --- /dev/null +++ b/nvblox/experiments/experiments/vector_copies/CMakeLists.txt @@ -0,0 +1,8 @@ +add_library(vector_copies_cuda vector_copies.cu) +target_include_directories(vector_copies_cuda PUBLIC "../../../tests/include") +target_link_libraries(vector_copies_cuda nvblox_experiments_common nvblox_test_utils) + +add_executable(vector_copies main.cpp) +target_link_libraries(vector_copies vector_copies_cuda nvblox_experiments_common) + +makeLink("${CMAKE_CURRENT_SOURCE_DIR}/plot_timings.py" "${CMAKE_CURRENT_BINARY_DIR}/" vector_copies) diff --git a/nvblox/experiments/experiments/vector_copies/main.cpp b/nvblox/experiments/experiments/vector_copies/main.cpp new file mode 100644 index 00000000..e0f11f7c --- /dev/null +++ b/nvblox/experiments/experiments/vector_copies/main.cpp @@ -0,0 +1,55 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/utils/timing.h" + +#include "vector_copies.h" + +DECLARE_bool(alsologtostderr); + +using namespace nvblox; + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + warmupCuda(); + // Do a warmup run and then reset the times. + runVectorCopyExperiments(1); + nvblox::timing::Timing::Reset(); + + runVectorCopyExperiments(1000); + + std::cout << nvblox::timing::Timing::Print(); + + // Optionally output to file as well. + if (argc >= 2) { + std::ofstream outfile(argv[1]); + if (outfile.is_open()) { + outfile << nvblox::timing::Timing::Print(); + outfile.close(); + } + } + + return 0; +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/vector_copies/plot_timings.py b/nvblox/experiments/experiments/vector_copies/plot_timings.py new file mode 100644 index 00000000..6f0297b2 --- /dev/null +++ b/nvblox/experiments/experiments/vector_copies/plot_timings.py @@ -0,0 +1,75 @@ +#!/usr/bin/python3 + +from nvblox.experiments.timing import get_timings_as_dataframe +import os +import argparse +import glob +import pandas as pd +from typing import Tuple +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import rcParams +rcParams.update({'figure.autolayout': True}) + + +def get_total_times(filepath: str) -> pd.Series: + timings = get_timings_as_dataframe(filepath) + return timings["total_time"] + + +def plot_timings(timing_root_dir: str, save_fig: bool = True) -> None: + print("Looking for results in: " + timing_root_dir) + + # Check that the profile results are in the dir + result_files = [name for name in os.listdir(timing_root_dir) + if os.path.isfile(os.path.join(timing_root_dir, name))] + + # Load every text file in the results dir. + df = pd.DataFrame() + for result_name in result_files: + results_path = os.path.join(timing_root_dir, result_name) + if os.path.splitext(result_name)[1] != ".txt": + continue + timings = get_total_times(results_path) + this_df = timings.to_frame(name=os.path.splitext(result_name)[0]) + if df.empty: + df = this_df + else: + df = pd.merge(df, this_df, left_index=True, right_index=True) + + print(df) + + print(df.loc["exp/initialize"]) + + # Organize meaningful columns + row_names = ["host to device", "device to device", + "device to host", "kernel"] + df.loc[row_names[0]] = df.loc["exp2a/cuda/host_to_device"] + df.loc[row_names[1]] = df.loc["exp2b/cuda/device_to_device"] + df.loc[row_names[2]] = df.loc["exp2c/cuda/device_to_host"] + df.loc[row_names[3]] = df.loc["exp2a/kernel"] + + # Plot + ax = df.loc[row_names].plot.bar(rot=0) + ax.set_ylabel('Total Time (s)') + for con in ax.containers: + ax.bar_label(con, fmt='%.2f') + plt.title("Vector Copy Times Across Platforms") + + # Save the plot in the root folder of the timings. + if save_fig: + image_path = os.path.join(timing_root_dir, 'timings.png') + print("Saving figure to disk as: " + image_path) + plt.savefig(image_path) + + plt.show() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description="Plot vector copy timing comparisons.") + parser.add_argument("timing_root_dir", metavar="timing_root_dir", type=str, + help="The directory containing the timing results.") + args = parser.parse_args() + plot_timings(args.timing_root_dir) diff --git a/nvblox/experiments/experiments/vector_copies/vector_copies.cu b/nvblox/experiments/experiments/vector_copies/vector_copies.cu new file mode 100644 index 00000000..060c48f7 --- /dev/null +++ b/nvblox/experiments/experiments/vector_copies/vector_copies.cu @@ -0,0 +1,155 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include +#include + +#include "nvblox/core/unified_vector.h" +#include "nvblox/tests/increment_on_gpu.h" +#include "nvblox/utils/timing.h" + +#include "vector_copies.h" + +using namespace nvblox; + +void runVectorCopyExperiments(int num_runs) { + constexpr size_t kVectorSize = 1000000; + + for (int i = 0; i < num_runs; i++) { + // Experiment 1a: copy vector host-to-device with thrust. + timing::Timer exp_initialize_timer("exp/initialize"); + std::vector start_vec(kVectorSize, 5); + exp_initialize_timer.Stop(); + + { + timing::Timer exp1a_timer("exp1a/thrust/host_to_device"); + thrust::device_vector vec_1a(kVectorSize); + thrust::copy(start_vec.begin(), start_vec.end(), vec_1a.begin()); + exp1a_timer.Stop(); + + timing::Timer exp1a_kernel_timer("exp1a/kernel"); + test_utils::incrementOnGPU(kVectorSize, + thrust::raw_pointer_cast(vec_1a.data())); + exp1a_kernel_timer.Stop(); + + // Experiment 1b: copy vector device-to-device with thrust. + timing::Timer exp1b_timer("exp1b/thrust/device_to_device"); + thrust::device_vector vec_1b(kVectorSize); + thrust::copy(vec_1a.begin(), vec_1a.end(), vec_1b.begin()); + exp1b_timer.Stop(); + + timing::Timer exp1b_kernel_timer("exp1b/kernel"); + test_utils::incrementOnGPU(kVectorSize, + thrust::raw_pointer_cast(vec_1b.data())); + exp1b_kernel_timer.Stop(); + + // Experiment 1c: copy vector device-to-host with thrust. + timing::Timer exp1c_timer("exp1c/thrust/device_to_host"); + std::vector vec_1c(kVectorSize); + thrust::copy(vec_1a.begin(), vec_1a.end(), vec_1c.begin()); + exp1c_timer.Stop(); + } + + { + // Experiment 2a: copy vector host-to-device with cuda memcpy. + timing::Timer exp2a_timer("exp2a/cuda/host_to_device"); + thrust::device_vector vec_2a(kVectorSize); + cudaMemcpy(thrust::raw_pointer_cast(vec_2a.data()), start_vec.data(), + kVectorSize * sizeof(int), cudaMemcpyHostToDevice); + exp2a_timer.Stop(); + + timing::Timer exp2a_kernel_timer("exp2a/kernel"); + test_utils::incrementOnGPU(kVectorSize, + thrust::raw_pointer_cast(vec_2a.data())); + exp2a_kernel_timer.Stop(); + + // Experiment 2b: copy vector device-to-device with cuda memcpy. + timing::Timer exp2b_timer("exp2b/cuda/device_to_device"); + thrust::device_vector vec_2b(kVectorSize); + cudaMemcpy(thrust::raw_pointer_cast(vec_2b.data()), + thrust::raw_pointer_cast(vec_2a.data()), + kVectorSize * sizeof(int), cudaMemcpyDeviceToDevice); + exp2b_timer.Stop(); + + timing::Timer exp2b_kernel_timer("exp2b/kernel"); + test_utils::incrementOnGPU(kVectorSize, + thrust::raw_pointer_cast(vec_2b.data())); + exp2b_kernel_timer.Stop(); + + // Experiment 2c: copy vector device-to-host with cuda memcpy. + timing::Timer exp2c_timer("exp2c/cuda/device_to_host"); + std::vector vec_2c(kVectorSize); + cudaMemcpy(vec_2c.data(), thrust::raw_pointer_cast(vec_2b.data()), + kVectorSize * sizeof(int), cudaMemcpyDeviceToHost); + exp2c_timer.Stop(); + } + + { + // Experiment 3: create a unified vector and use it in a kernel. + timing::Timer exp3_initialize_timer("exp3/unified/initialize"); + unified_vector unified_vec(kVectorSize, 5); + exp3_initialize_timer.Stop(); + + timing::Timer exp3_kernel_timer("exp3/unified/kernel"); + test_utils::incrementOnGPU(kVectorSize, unified_vec.data()); + exp3_kernel_timer.Stop(); + + timing::Timer exp3_increment_timer("exp3/unified/cpu_increment"); + for (size_t i = 0; i < kVectorSize; i++) { + unified_vec[i]++; + } + exp3_increment_timer.Stop(); + + timing::Timer exp3_host_increment_timer("exp3/host/cpu_increment"); + for (size_t i = 0; i < kVectorSize; i++) { + start_vec[i]++; + } + exp3_host_increment_timer.Stop(); + } + // Experiment 4: is it faster to create an array on host, copy its value to + // GPU, delete it or use unified memory? + // 4a: baseline: device copy. + { + timing::Timer exp4a("exp4a/device_mem"); + std::vector exp4a_vec(kVectorSize, 1); + int* exp4a_dev; + cudaMalloc(&exp4a_dev, kVectorSize * sizeof(int)); + cudaMemcpy(exp4a_dev, exp4a_vec.data(), kVectorSize * sizeof(int), + cudaMemcpyHostToDevice); + test_utils::incrementOnGPU(kVectorSize, exp4a_dev); + cudaFree(exp4a_dev); + exp4a.Stop(); + } + + // 4b: unified without anything special, using memset to initialize. + { + timing::Timer exp4b("exp4b/unified_mem"); + unified_vector unified_vec(kVectorSize); + memset(unified_vec.data(), kVectorSize * sizeof(int), 1); + test_utils::incrementOnGPU(kVectorSize, unified_vec.data()); + exp4b.Stop(); + } + + // 4c: unified vector using the unified vector code (on CPU) to initialize. + { + timing::Timer exp4c("exp4c/unified_cpu_set"); + unified_vector unified_vec(kVectorSize, 1); + // unified_vec.toGPU(); + test_utils::incrementOnGPU(kVectorSize, unified_vec.data()); + exp4c.Stop(); + } + } +} \ No newline at end of file diff --git a/nvblox/experiments/experiments/vector_copies/vector_copies.h b/nvblox/experiments/experiments/vector_copies/vector_copies.h new file mode 100644 index 00000000..dc425b37 --- /dev/null +++ b/nvblox/experiments/experiments/vector_copies/vector_copies.h @@ -0,0 +1,18 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +void runVectorCopyExperiments(int num_runs); \ No newline at end of file diff --git a/nvblox/experiments/include/nvblox/experiments/common/fuse_3dmatch.h b/nvblox/experiments/include/nvblox/experiments/common/fuse_3dmatch.h new file mode 100644 index 00000000..a5b313be --- /dev/null +++ b/nvblox/experiments/include/nvblox/experiments/common/fuse_3dmatch.h @@ -0,0 +1,113 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" +#include "nvblox/core/layer_cake.h" +#include "nvblox/core/mapper.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/gpu_hash/gpu_layer_view.h" +#include "nvblox/integrators/esdf_integrator.h" +#include "nvblox/integrators/projective_color_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/mesh/mesh_block.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/ray_tracing/sphere_tracer.h" + +namespace nvblox { +namespace experiments { + +class Fuse3DMatch { + public: + Fuse3DMatch() = default; + Fuse3DMatch(const std::string& base_path, + const std::string& timing_output_path = std::string(), + const std::string& mesh_output_path = std::string(), + const std::string& esdf_output_path = std::string()); + + // Runs an experiment + int run(); + + // Set the base path pointing to the dataset. + void setBasePath(const std::string& base_path); + // Set the sequence number within the 3D Match dataset. + void setSequenceNumber(int sequence_num); + + // Set various settings. + void setVoxelSize(float voxel_size); + void setTsdfFrameSubsampling(int subsample); + void setColorFrameSubsampling(int subsample); + void setMeshFrameSubsampling(int subsample); + void setEsdfFrameSubsampling(int subsample); + + // Integrate certain layers. + bool integrateFrame(const int frame_number); + bool integrateFrames(); + + // Initialize the image loaders based on the current base_path and + // sequence_num + void initializeImageLoaders(bool multithreaded = false); + + // Output a pointcloud ESDF as PLY file. + bool outputPointcloudPly(); + // Output a file with the mesh. + bool outputMeshPly(); + // Output timings to a file + bool outputTimingsToFile(); + + // Factory: From command line args + static Fuse3DMatch createFromCommandLineArgs(int argc, char* argv[]); + + // Get the mapper (useful for experiments where we modify mapper settings) + RgbdMapper& mapper() { return mapper_; } + + // Dataset settings. + std::string base_path_; + int sequence_num_ = 1; + int num_frames_to_integrate_ = std::numeric_limits::max(); + std::unique_ptr> depth_image_loader_; + std::unique_ptr> color_image_loader_; + + // Params + float voxel_size_m_ = 0.05; + int tsdf_frame_subsampling_ = 1; + int color_frame_subsampling_ = 1; + int mesh_frame_subsampling_ = 1; + int esdf_frame_subsampling_ = 1; + + // ESDF slice params + float z_min_ = 0.5f; + float z_max_ = 1.0f; + float z_slice_ = 0.75f; + + // Mapper - Contains map layers and integrators + RgbdMapper mapper_; + + // Output paths + std::string timing_output_path_; + std::string esdf_output_path_; + std::string mesh_output_path_; +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/include/nvblox/experiments/integrators/cuda/depth_frame_texture.cuh b/nvblox/experiments/include/nvblox/experiments/integrators/cuda/depth_frame_texture.cuh new file mode 100644 index 00000000..97384d58 --- /dev/null +++ b/nvblox/experiments/include/nvblox/experiments/integrators/cuda/depth_frame_texture.cuh @@ -0,0 +1,38 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/image.h" + +namespace nvblox { +namespace experiments { + +class DepthImageTexture { + public: + DepthImageTexture(const DepthImage& depth_frame, cudaStream_t transfer_stream = 0); + ~DepthImageTexture(); + + cudaTextureObject_t texture_object() const { return depth_texture_; } + + private: + cudaArray_t depth_array_; + cudaTextureObject_t depth_texture_; +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/include/nvblox/experiments/integrators/cuda/experimental_integrator_input_frames.cuh b/nvblox/experiments/include/nvblox/experiments/integrators/cuda/experimental_integrator_input_frames.cuh new file mode 100644 index 00000000..edad8d7a --- /dev/null +++ b/nvblox/experiments/include/nvblox/experiments/integrators/cuda/experimental_integrator_input_frames.cuh @@ -0,0 +1,91 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +#include "nvblox/experiments/integrators/cuda/depth_frame_texture.cuh" + +namespace nvblox { +namespace experiments { + +class IntegratorInputFrameExperimentsBase { + public: + IntegratorInputFrameExperimentsBase( + const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + const float max_weight, TsdfLayer* layer_ptr, cudaStream_t stream); + virtual ~IntegratorInputFrameExperimentsBase(); + + // Params for this call + const float block_size; + const float truncation_distance_m; + const float max_weight; + const int num_blocks; + + // Device-side inputs + Index3D* block_indices_device_ptr; + Camera* camera_device_ptr; + Eigen::Matrix3f* R_C_L_device_ptr; + Eigen::Vector3f* t_C_L_device_ptr; + VoxelBlock** block_device_ptrs; + + // Host-side inputs + const Eigen::Matrix3f R_C_L; + const Eigen::Vector3f t_C_L; + std::vector*> block_ptrs; + + // The stream on which host->device transfers are processed + cudaStream_t transfer_stream; +}; + +class IntegratorInputFrameExperimentsTexture + : public IntegratorInputFrameExperimentsBase { + public: + IntegratorInputFrameExperimentsTexture( + const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + const float max_weight, TsdfLayer* layer_ptr, cudaStream_t stream); + virtual ~IntegratorInputFrameExperimentsTexture() {}; + + // Texture + DepthImageTexture depth_texture; +}; + +class IntegratorInputFrameExperimentsGlobal + : public IntegratorInputFrameExperimentsBase { + public: + IntegratorInputFrameExperimentsGlobal( + const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + const float max_weight, TsdfLayer* layer_ptr, cudaStream_t stream); + virtual ~IntegratorInputFrameExperimentsGlobal() {}; + + // Depth frame + // Stored as a unified ptr to row-major float memory. + const float* depth_frame_unified_ptr; + const int depth_frame_rows; + const int depth_frame_cols; +}; + +} // namespace experiments +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/experiments/include/nvblox/experiments/integrators/experimental_projective_tsdf_integrators.h b/nvblox/experiments/include/nvblox/experiments/integrators/experimental_projective_tsdf_integrators.h new file mode 100644 index 00000000..d59078be --- /dev/null +++ b/nvblox/experiments/include/nvblox/experiments/integrators/experimental_projective_tsdf_integrators.h @@ -0,0 +1,63 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/integrators/projective_tsdf_integrator.h" + +namespace nvblox { +namespace experiments { + +class ProjectiveTsdfIntegratorExperimentsBase + : public ProjectiveTsdfIntegrator { + public: + ProjectiveTsdfIntegratorExperimentsBase(); + virtual ~ProjectiveTsdfIntegratorExperimentsBase(); + + void finish() const; + + protected: +}; + +class ProjectiveTsdfIntegratorExperimentsTexture + : public ProjectiveTsdfIntegratorExperimentsBase { + public: + ProjectiveTsdfIntegratorExperimentsTexture() + : ProjectiveTsdfIntegratorExperimentsBase(){}; + virtual ~ProjectiveTsdfIntegratorExperimentsTexture(){}; + + protected: + void updateBlocks(const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + TsdfLayer* layer) override; +}; + +class ProjectiveTsdfIntegratorExperimentsGlobal + : public ProjectiveTsdfIntegratorExperimentsBase { + public: + ProjectiveTsdfIntegratorExperimentsGlobal() + : ProjectiveTsdfIntegratorExperimentsBase(){}; + virtual ~ProjectiveTsdfIntegratorExperimentsGlobal(){}; + + protected: + void updateBlocks(const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + TsdfLayer* layer) override; +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/python/MANIFEST.in b/nvblox/experiments/python/MANIFEST.in new file mode 100644 index 00000000..853ce576 --- /dev/null +++ b/nvblox/experiments/python/MANIFEST.in @@ -0,0 +1,14 @@ +# Main library, including compiled binaries +graft nvblox_experiments + +# Misc +include README.md +include requirements.txt +include setup.py + +# Patterns to exclude from any directory +global-exclude *~ +global-exclude *.pyc +global-exclude *.pyo +global-exclude .git +global-exclude *__pycache__ \ No newline at end of file diff --git a/nvblox/experiments/python/README.md b/nvblox/experiments/python/README.md new file mode 100644 index 00000000..e0933a8a --- /dev/null +++ b/nvblox/experiments/python/README.md @@ -0,0 +1,2 @@ +## nvblox experiments python package +Small python package for helping run and interpret the results of nvblox experiments. diff --git a/nvblox/experiments/python/nvblox_experiments/__init__.py b/nvblox/experiments/python/nvblox_experiments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nvblox/experiments/python/nvblox_experiments/threedmatch.py b/nvblox/experiments/python/nvblox_experiments/threedmatch.py new file mode 100644 index 00000000..acb79b21 --- /dev/null +++ b/nvblox/experiments/python/nvblox_experiments/threedmatch.py @@ -0,0 +1,37 @@ +import os +import subprocess +from typing import Dict + + +def run_single(executable_path: str, dataset_base_path: str, + timing_output_path: str = None, num_frames: int = None, + flags: Dict = None): + # Launch the 3DMatch reconstruction + args_string = dataset_base_path + if timing_output_path: + args_string += " --timing_output_path " + timing_output_path + if num_frames: + args_string += " --num_frames " + str(num_frames) + if flags: + for key, value in flags.items(): + args_string += " --" + key + " " + str(value) + + run_string = executable_path + ' ' + args_string + print("Running 3DMatch as:\n " + run_string) + subprocess.call(run_string, shell=True) + + +def run_multiple(num_runs: int, executable_path: str, dataset_base_path: str, + timing_output_dir: str = None, num_frames: int = None, + warmup_run: bool = True, flags: Dict = None): + run_indices = list(range(num_runs)) + if warmup_run: + run_indices.insert(0, 0) + for run_idx in run_indices: + print(f"Run: {run_idx}") + + timing_output_name = f"run_{run_idx}.txt" + timing_output_path = os.path.join(timing_output_dir, timing_output_name) + + run_single(executable_path, dataset_base_path, + timing_output_path, num_frames, flags) diff --git a/nvblox/experiments/python/nvblox_experiments/timing.py b/nvblox/experiments/python/nvblox_experiments/timing.py new file mode 100644 index 00000000..27555264 --- /dev/null +++ b/nvblox/experiments/python/nvblox_experiments/timing.py @@ -0,0 +1,29 @@ +import re +import pandas as pd + +def get_timings_as_dataframe(filepath: str): + names = [] + num_calls = [] + total_times = [] + means = [] + stds = [] + mins = [] + maxes = [] + with open(filepath, 'r') as f: + lines = f.readlines() + for line in lines[2:]: + entries = re.split('\s+|,', line) + entries = [entry.strip('()[]') for entry in entries] + + names.append(entries[0]) + num_calls.append(int(entries[1])) + total_times.append(float(entries[2])) + means.append(float(entries[3])) + stds.append(float(entries[5])) + mins.append(float(entries[6])) + maxes.append(float(entries[7])) + + d = {"num_calls": num_calls, "total_time": total_times, + "mean": means, "std": stds, "min": mins, "max": maxes} + timings = pd.DataFrame(d, index=names) + return timings diff --git a/nvblox/experiments/python/requirements.txt b/nvblox/experiments/python/requirements.txt new file mode 100644 index 00000000..1411a4a0 --- /dev/null +++ b/nvblox/experiments/python/requirements.txt @@ -0,0 +1 @@ +pandas \ No newline at end of file diff --git a/nvblox/experiments/python/setup.py b/nvblox/experiments/python/setup.py new file mode 100644 index 00000000..17623ac4 --- /dev/null +++ b/nvblox/experiments/python/setup.py @@ -0,0 +1,17 @@ +import setuptools + +with open('requirements.txt', 'r') as f: + install_requires = [line.strip() for line in f.readlines() if line] + +setuptools.setup( + name="nvblox_experiments", + version="0.0.0", + author="Helen Oleynikova + Alexander Millane", + author_email="holeynikova@nvidia.com + amilllane@nvidia.com", + description="nvblox_experiments: Helper module for nvblox experiments", + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + install_requires=install_requires, + include_package_data=True, + packages=setuptools.find_packages() +) diff --git a/nvblox/experiments/src/common/fuse_3dmatch.cpp b/nvblox/experiments/src/common/fuse_3dmatch.cpp new file mode 100644 index 00000000..0c61508e --- /dev/null +++ b/nvblox/experiments/src/common/fuse_3dmatch.cpp @@ -0,0 +1,280 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/common/fuse_3dmatch.h" + +#include +#include + +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/io/mesh_io.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/io/pointcloud_io.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { +namespace experiments { + +Fuse3DMatch::Fuse3DMatch(const std::string& base_path, + const std::string& timing_output_path, + const std::string& mesh_output_path, + const std::string& esdf_output_path) + : base_path_(base_path), + timing_output_path_(timing_output_path), + mesh_output_path_(mesh_output_path), + esdf_output_path_(esdf_output_path), + mapper_(voxel_size_m_) { + initializeImageLoaders(); + // Params + mapper_.mesh_integrator().min_weight() = 2.0f; + mapper_.color_integrator().max_integration_distance_m(5.0f); + mapper_.tsdf_integrator().max_integration_distance_m(5.0f); + mapper_.tsdf_integrator().frustum_calculator().raycast_subsampling_factor(4); + mapper_.esdf_integrator().max_distance_m() = 4.0f; + mapper_.esdf_integrator().min_weight() = 2.0f; +}; + +int Fuse3DMatch::run() { + LOG(INFO) << "Trying to integrate the first frame: "; + if (!integrateFrames()) { + LOG(FATAL) + << "Failed to integrate first frame. Please check the file path."; + return 1; + } + + if (!mesh_output_path_.empty()) { + LOG(INFO) << "Outputting mesh ply file to " << mesh_output_path_; + outputMeshPly(); + } + + if (!esdf_output_path_.empty()) { + LOG(INFO) << "Outputting ESDF pointcloud ply file to " << esdf_output_path_; + outputPointcloudPly(); + } + + LOG(INFO) << nvblox::timing::Timing::Print(); + + LOG(INFO) << "Writing timings to file."; + outputTimingsToFile(); + + return 0; +} +void Fuse3DMatch::setVoxelSize(float voxel_size) { voxel_size_m_ = voxel_size; } +void Fuse3DMatch::setTsdfFrameSubsampling(int subsample) { + tsdf_frame_subsampling_ = subsample; +} +void Fuse3DMatch::setColorFrameSubsampling(int subsample) { + color_frame_subsampling_ = subsample; +} +void Fuse3DMatch::setMeshFrameSubsampling(int subsample) { + mesh_frame_subsampling_ = subsample; +} +void Fuse3DMatch::setEsdfFrameSubsampling(int subsample) { + esdf_frame_subsampling_ = subsample; +} + +void Fuse3DMatch::setBasePath(const std::string& base_path) { + base_path_ = base_path; + initializeImageLoaders(); +} + +void Fuse3DMatch::setSequenceNumber(int sequence_num) { + sequence_num_ = sequence_num; + initializeImageLoaders(); +} + +void Fuse3DMatch::initializeImageLoaders(bool multithreaded) { + // NOTE(alexmillane): On my desktop the performance of threaded image loading + // seems to saturate at around 6 threads. On machines with less cores (I have + // 12 physical) presumably the saturation point will be less. + if (multithreaded) { + constexpr unsigned int kMaxLoadingThreads = 6; + unsigned int num_loading_threads = + std::min(kMaxLoadingThreads, std::thread::hardware_concurrency() / 2); + CHECK_GT(num_loading_threads, 0) + << "std::thread::hardware_concurrency() returned 0"; + LOG(INFO) << "Using " << num_loading_threads + << " threads for loading images."; + depth_image_loader_ = + datasets::threedmatch::createMultithreadedDepthImageLoader( + base_path_, sequence_num_, num_loading_threads, + MemoryType::kDevice); + color_image_loader_ = + datasets::threedmatch::createMultithreadedColorImageLoader( + base_path_, sequence_num_, num_loading_threads, + MemoryType::kDevice); + } else { + depth_image_loader_ = datasets::threedmatch::createDepthImageLoader( + base_path_, sequence_num_, MemoryType::kDevice); + color_image_loader_ = datasets::threedmatch::createColorImageLoader( + base_path_, sequence_num_, MemoryType::kDevice); + } +} + +bool Fuse3DMatch::integrateFrame(const int frame_number) { + timing::Timer timer_file("3dmatch/file_loading"); + + // Get the camera for this frame. + timing::Timer timer_file_intrinsics("file_loading/intrinsics"); + Eigen::Matrix3f camera_intrinsics; + if (!datasets::threedmatch::parseCameraFromFile( + datasets::threedmatch::getPathForCameraIntrinsics(base_path_), + &camera_intrinsics)) { + return false; + } + timer_file_intrinsics.Stop(); + + // Load the image into a Depth Frame. + CHECK(depth_image_loader_); + timing::Timer timer_file_depth("file_loading/depth_image"); + DepthImage depth_frame; + if (!depth_image_loader_->getNextImage(&depth_frame)) { + return false; + } + timer_file_depth.Stop(); + + // Load the color image into a ColorImage + CHECK(color_image_loader_); + timing::Timer timer_file_color("file_loading/color_image"); + ColorImage color_frame; + if (!color_image_loader_->getNextImage(&color_frame)) { + return false; + } + timer_file_color.Stop(); + + // Get the transform. + timing::Timer timer_file_camera("file_loading/camera"); + Transform T_L_C; + if (!datasets::threedmatch::parsePoseFromFile( + datasets::threedmatch::getPathForFramePose(base_path_, sequence_num_, + frame_number), + &T_L_C)) { + return false; + } + + // Rotate the world frame since Y is up in the normal 3D match dasets. + Eigen::Quaternionf q_L_O = + Eigen::Quaternionf::FromTwoVectors(Vector3f(0, 1, 0), Vector3f(0, 0, 1)); + T_L_C = q_L_O * T_L_C; + + // Create a camera object. + int image_width = depth_frame.cols(); + int image_height = depth_frame.rows(); + const auto camera = Camera::fromIntrinsicsMatrix(camera_intrinsics, + image_width, image_height); + + // Check that the loaded data doesn't contain NaNs or a faulty rotation + // matrix. This does occur. If we find one, skip that frame and move to the + // next. + constexpr float kRotationMatrixDetEpsilon = 1e-4; + if (!T_L_C.matrix().allFinite() || !camera_intrinsics.allFinite() || + std::abs(T_L_C.matrix().block<3, 3>(0, 0).determinant() - 1.0f) > + kRotationMatrixDetEpsilon) { + LOG(WARNING) << "Bad CSV data."; + return true; // Bad data, but keep going. + } + + timer_file_camera.Stop(); + timer_file.Stop(); + + timing::Timer per_frame_timer("3dmatch/time_per_frame"); + if ((frame_number + 1) % tsdf_frame_subsampling_ == 0) { + timing::Timer timer_integrate("3dmatch/integrate_tsdf"); + mapper_.integrateDepth(depth_frame, T_L_C, camera); + timer_integrate.Stop(); + } + + if ((frame_number + 1) % color_frame_subsampling_ == 0) { + timing::Timer timer_integrate_color("3dmatch/integrate_color"); + mapper_.integrateColor(color_frame, T_L_C, camera); + timer_integrate_color.Stop(); + } + + if ((frame_number + 1) % mesh_frame_subsampling_ == 0) { + timing::Timer timer_mesh("3dmatch/mesh"); + mapper_.updateMesh(); + } + + if ((frame_number + 1) % esdf_frame_subsampling_ == 0) { + timing::Timer timer_integrate_esdf("3dmatch/integrate_esdf"); + mapper_.updateEsdfSlice(z_min_, z_max_, z_slice_); + timer_integrate_esdf.Stop(); + } + + per_frame_timer.Stop(); + + return true; +} + +bool Fuse3DMatch::integrateFrames() { + int frame_number = 0; + while (frame_number < num_frames_to_integrate_ && + integrateFrame(frame_number++)) { + timing::mark("Frame " + std::to_string(frame_number - 1), Color::Red()); + LOG(INFO) << "Integrating frame " << frame_number - 1; + } + return true; +} + +bool Fuse3DMatch::outputPointcloudPly() { + timing::Timer timer_write("3dmatch/esdf/write"); + return io::outputVoxelLayerToPly(mapper_.esdf_layer(), esdf_output_path_); +} + +bool Fuse3DMatch::outputMeshPly() { + timing::Timer timer_write("3dmatch/mesh/write"); + return io::outputMeshLayerToPly(mapper_.mesh_layer(), mesh_output_path_); +} +bool Fuse3DMatch::outputTimingsToFile() { + LOG(INFO) << "Writing timing to: " << timing_output_path_; + std::ofstream timing_file(timing_output_path_); + timing_file << nvblox::timing::Timing::Print(); + timing_file.close(); + return true; +} + +Fuse3DMatch Fuse3DMatch::createFromCommandLineArgs(int argc, char* argv[]) { + std::string base_path = ""; + std::string timing_output_path = "./3dmatch_timings.txt"; + std::string esdf_output_path = ""; + std::string mesh_output_path = ""; + if (argc < 2) { + // Try out running on the test datasets. + base_path = "../tests/data/3dmatch"; + LOG(WARNING) << "No 3DMatch file path specified; defaulting to the test " + "directory."; + } else { + base_path = argv[1]; + LOG(INFO) << "Loading 3DMatch files from " << base_path; + } + // Optionally overwrite the output paths. + if (argc >= 3) { + timing_output_path = argv[2]; + } + if (argc >= 3) { + esdf_output_path = argv[3]; + } + if (argc >= 4) { + mesh_output_path = argv[4]; + } + + nvblox::experiments::Fuse3DMatch fuser(base_path, timing_output_path, + mesh_output_path, esdf_output_path); + + return fuser; +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/src/fuse_3dmatch.cpp b/nvblox/experiments/src/fuse_3dmatch.cpp new file mode 100644 index 00000000..074b2db6 --- /dev/null +++ b/nvblox/experiments/src/fuse_3dmatch.cpp @@ -0,0 +1,81 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/experiments/common/fuse_3dmatch.h" + +DECLARE_bool(alsologtostderr); + +DEFINE_int32(num_frames, -1, + "Number of frames to process. Empty means process all."); + +DEFINE_string(timing_output_path, "", + "File in which to save the timing results."); +DEFINE_string(esdf_output_path, "", + "File in which to save the ESDF pointcloud."); +DEFINE_string(mesh_output_path, "", "File in which to save the surface mesh."); + +DEFINE_double(voxel_size, 0.0f, "Voxel resolution in meters."); +DEFINE_int32(tsdf_frame_subsampling, 0, + "By what amount to subsample the TSDF frames. A subsample of 3 " + "means only every 3rd frame is taken."); +DEFINE_int32(color_frame_subsampling, 0, + "How much to subsample the color integration by."); +DEFINE_int32(mesh_frame_subsampling, 0, + "How much to subsample the meshing by."); +DEFINE_int32(esdf_frame_subsampling, 0, + "How much to subsample the ESDF integration by."); + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + nvblox::experiments::Fuse3DMatch fuser = + nvblox::experiments::Fuse3DMatch::createFromCommandLineArgs(argc, argv); + if (FLAGS_num_frames > 0) { + fuser.num_frames_to_integrate_ = FLAGS_num_frames; + } + if (!FLAGS_timing_output_path.empty()) { + fuser.timing_output_path_ = FLAGS_timing_output_path; + } + if (!FLAGS_esdf_output_path.empty()) { + fuser.esdf_output_path_ = FLAGS_esdf_output_path; + } + if (!FLAGS_mesh_output_path.empty()) { + fuser.mesh_output_path_ = FLAGS_mesh_output_path; + } + if (FLAGS_voxel_size > 0.0f) { + fuser.setVoxelSize(static_cast(FLAGS_voxel_size)); + } + if (FLAGS_tsdf_frame_subsampling > 0) { + fuser.setTsdfFrameSubsampling(FLAGS_tsdf_frame_subsampling); + } + if (FLAGS_color_frame_subsampling > 0) { + fuser.setColorFrameSubsampling(FLAGS_color_frame_subsampling); + } + if (FLAGS_mesh_frame_subsampling > 0) { + fuser.setMeshFrameSubsampling(FLAGS_mesh_frame_subsampling); + } + if (FLAGS_esdf_frame_subsampling > 0) { + fuser.setEsdfFrameSubsampling(FLAGS_esdf_frame_subsampling); + } + + // Make sure the layers are the correct resolution. + return fuser.run(); +} diff --git a/nvblox/experiments/src/integrators/cuda/depth_frame_texture.cu b/nvblox/experiments/src/integrators/cuda/depth_frame_texture.cu new file mode 100644 index 00000000..7d465c90 --- /dev/null +++ b/nvblox/experiments/src/integrators/cuda/depth_frame_texture.cu @@ -0,0 +1,70 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/integrators/cuda/depth_frame_texture.cuh" + +#include "nvblox/core/cuda/error_check.cuh" + +namespace nvblox { +namespace experiments { + +DepthImageTexture::DepthImageTexture(const DepthImage& depth_frame, + cudaStream_t transfer_stream) { + // Note(alexmillane): Taken from texture memory example + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#texture-memory + + // Allocate CUDA array in device memory + // Each channel is a 32bit float in the first (x) dimension + const cudaChannelFormatDesc channelDesc = + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); + checkCudaErrors(cudaMallocArray(&depth_array_, &channelDesc, + depth_frame.width(), depth_frame.height())); + + // Set pitch of the source (the width in memory in bytes of the 2D array + // pointed to by src, including padding), we dont have any padding + const size_t spitch = depth_frame.width() * sizeof(float); + // Copy data located at address h_data in host memory to device memory + checkCudaErrors(cudaMemcpy2DToArrayAsync( + depth_array_, 0, 0, depth_frame.dataConstPtr(), spitch, + depth_frame.width() * sizeof(float), depth_frame.height(), + cudaMemcpyDefault, transfer_stream)); + + // Specify texture + struct cudaResourceDesc resource_description; + memset(&resource_description, 0, sizeof(resource_description)); + resource_description.resType = cudaResourceTypeArray; + resource_description.res.array.array = depth_array_; + + // Specify texture object parameters + struct cudaTextureDesc texture_description; + memset(&texture_description, 0, sizeof(texture_description)); + texture_description.addressMode[0] = cudaAddressModeClamp; + texture_description.addressMode[1] = cudaAddressModeClamp; + texture_description.filterMode = cudaFilterModeLinear; + texture_description.readMode = cudaReadModeElementType; + texture_description.normalizedCoords = 0; + + // Create texture object + checkCudaErrors(cudaCreateTextureObject( + &depth_texture_, &resource_description, &texture_description, NULL)); +} + +DepthImageTexture::~DepthImageTexture() { + cudaDestroyTextureObject(depth_texture_); + cudaFreeArray(depth_array_); +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/src/integrators/cuda/experimental_integrator_input_frames.cu b/nvblox/experiments/src/integrators/cuda/experimental_integrator_input_frames.cu new file mode 100644 index 00000000..aa29dc45 --- /dev/null +++ b/nvblox/experiments/src/integrators/cuda/experimental_integrator_input_frames.cu @@ -0,0 +1,102 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/integrators/cuda/experimental_integrator_input_frames.cuh" + +#include "nvblox/integrators/integrators_common.h" + +namespace nvblox { +namespace experiments { + +IntegratorInputFrameExperimentsBase::IntegratorInputFrameExperimentsBase( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float _truncation_distance_m, const float _max_weight, + TsdfLayer* layer_ptr, cudaStream_t stream) + : block_size(layer_ptr->block_size()), + truncation_distance_m(_truncation_distance_m), + max_weight(_max_weight), + num_blocks(block_indices.size()), + R_C_L(T_L_C.inverse().rotation()), + t_C_L(T_L_C.inverse().translation()), + block_ptrs(getBlockPtrsFromIndices(block_indices, layer_ptr)), + transfer_stream(stream) { + // Allocate GPU memory + checkCudaErrors(cudaMalloc(&R_C_L_device_ptr, sizeof(Eigen::Matrix3f))); + checkCudaErrors(cudaMalloc(&t_C_L_device_ptr, sizeof(Eigen::Vector3f))); + checkCudaErrors(cudaMalloc(&camera_device_ptr, sizeof(Camera))); + checkCudaErrors(cudaMalloc(&block_indices_device_ptr, + block_indices.size() * sizeof(Index3D))); + checkCudaErrors( + cudaMalloc(&block_device_ptrs, + block_indices.size() * sizeof(VoxelBlock*))); + + // Host -> GPU transfers + checkCudaErrors(cudaMemcpyAsync(block_indices_device_ptr, + block_indices.data(), + block_indices.size() * sizeof(Index3D), + cudaMemcpyHostToDevice, transfer_stream)); + checkCudaErrors(cudaMemcpyAsync(R_C_L_device_ptr, R_C_L.data(), + sizeof(Eigen::Matrix3f), + cudaMemcpyHostToDevice, transfer_stream)); + checkCudaErrors(cudaMemcpyAsync(t_C_L_device_ptr, t_C_L.data(), + sizeof(Eigen::Vector3f), + cudaMemcpyHostToDevice, transfer_stream)); + checkCudaErrors(cudaMemcpyAsync(camera_device_ptr, &camera, sizeof(Camera), + cudaMemcpyHostToDevice, transfer_stream)); + checkCudaErrors( + cudaMemcpyAsync(block_device_ptrs, block_ptrs.data(), + block_ptrs.size() * sizeof(VoxelBlock*), + cudaMemcpyHostToDevice, transfer_stream)); +} + +IntegratorInputFrameExperimentsBase::~IntegratorInputFrameExperimentsBase() { + cudaStreamSynchronize(transfer_stream); + checkCudaErrors(cudaFree(block_indices_device_ptr)); + checkCudaErrors(cudaFree(camera_device_ptr)); + checkCudaErrors(cudaFree(R_C_L_device_ptr)); + checkCudaErrors(cudaFree(t_C_L_device_ptr)); + checkCudaErrors(cudaFree(block_device_ptrs)); +} + +IntegratorInputFrameExperimentsTexture::IntegratorInputFrameExperimentsTexture( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, const float max_weight, + TsdfLayer* layer_ptr, cudaStream_t stream) + : IntegratorInputFrameExperimentsBase(block_indices, depth_frame, T_L_C, + camera, truncation_distance_m, + max_weight, layer_ptr, stream), + depth_texture(depth_frame, stream){ + // The base class handles all the other transfers. + }; + +IntegratorInputFrameExperimentsGlobal::IntegratorInputFrameExperimentsGlobal( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, const float max_weight, + TsdfLayer* layer_ptr, cudaStream_t stream) + : IntegratorInputFrameExperimentsBase(block_indices, depth_frame, T_L_C, + camera, truncation_distance_m, + max_weight, layer_ptr, stream), + depth_frame_unified_ptr(depth_frame.dataConstPtr()), + depth_frame_rows(depth_frame.rows()), + depth_frame_cols(depth_frame.cols()) { + // Send the depth-frame to GPU + depth_frame.toGPU(); +}; + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/experiments/src/integrators/cuda/experimental_projective_tsdf_integrators.cu b/nvblox/experiments/src/integrators/cuda/experimental_projective_tsdf_integrators.cu new file mode 100644 index 00000000..c1b965bd --- /dev/null +++ b/nvblox/experiments/src/integrators/cuda/experimental_projective_tsdf_integrators.cu @@ -0,0 +1,344 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/experiments/integrators/experimental_projective_tsdf_integrators.h" + +#include "nvblox/experiments/integrators/cuda/experimental_integrator_input_frames.cuh" + +namespace nvblox { +namespace experiments { + +__device__ inline float interpolateDepthTexture( + cudaTextureObject_t depth_texture, const Eigen::Vector2f& u_px) { + return tex2D(depth_texture, u_px.x() + 0.5, u_px.y() + 0.5); +} + +__device__ inline bool interpolateDepthImage(const float* image, int rows, + int cols, Eigen::Vector2f u_px, + float* value_ptr) { + // If the projected point does not lie on the image plane, fail. (Here "on the + // image plane" means having pixel centers surrounding the query point, ie no + // extrapolation). + if ((u_px.x() < 0.0f) || (u_px.y() < 0.0f) || + (u_px.x() > static_cast(cols) - 1.0f) || + (u_px.y() > static_cast(rows) - 1.0f)) { + return false; + } + // Interpolation of a grid on with 1 pixel spacing. + // https://en.wikipedia.org/wiki/Bilinear_interpolation#On_the_unit_square + // Get the pixel coordinates of the pixel on the low side + const Index2D u_low_side_px = (u_px).cast(); + // Get the 4-neighbours values and put them in a matrix + // clang-format off + const Eigen::Matrix2f value_matrix = + (Eigen::Matrix2f() << + image::access(u_low_side_px.y(), u_low_side_px.x(), cols, image), + image::access(u_low_side_px.y() + 1, u_low_side_px.x(), cols, image), + image::access(u_low_side_px.y(), u_low_side_px.x() + 1, cols, image), + image::access(u_low_side_px.y() + 1, u_low_side_px.x() + 1, cols, image)) + .finished(); + // clang-format on + // Offset of the requested point to the low side center. + const Eigen::Vector2f u_offset = (u_px - u_low_side_px.cast()); + const Eigen::Vector2f x_vec(1.0f - u_offset.x(), u_offset.x()); + const Eigen::Vector2f y_vec(1.0f - u_offset.y(), u_offset.y()); + *value_ptr = x_vec.transpose() * value_matrix * y_vec; + return true; +} + +__global__ void intergrateBlocksTextureBasedInterpolation( + const Index3D* block_indices_device_ptr, const Camera* camera_device_ptr, + cudaTextureObject_t depth_texture, const Eigen::Matrix3f* R_C_L_device_ptr, + const Eigen::Vector3f* t_C_L_device_ptr, const float block_size, + const float truncation_distance_m, const float max_weight, + VoxelBlock** block_device_ptrs) { + // Linear index of thread within block + const int thread_index_linear = + threadIdx.x + blockDim.x * (threadIdx.y + (blockDim.y * threadIdx.z)); + + // Get the data which is common between all threads in a block into shared + // memory + // TODO(alexmillane): We could also get the camera into shared memory. But + // maybe let's profile things first and see what is actually affecting the + // performance. + __shared__ Eigen::Matrix3f R_C_L; + if (thread_index_linear < 9) { + R_C_L.data()[thread_index_linear] = + R_C_L_device_ptr->data()[thread_index_linear]; + } + __shared__ Eigen::Vector3f t_C_L; + if (thread_index_linear >= 9 && thread_index_linear < 12) { + t_C_L.data()[thread_index_linear - 9] = + t_C_L_device_ptr->data()[thread_index_linear - 9]; + } + __syncthreads(); + + // The indices of the voxel this thread will work on + // blockIdx.x - The index of the block we're working on (blockIdx.y/z + // should be zero) + // threadIdx.x/y/z - The indices of the voxel within the block (we + // expect the threadBlockDims == voxelBlockDims) + const Index3D block_idx = block_indices_device_ptr[blockIdx.x]; + const Index3D voxel_idx(threadIdx.z, threadIdx.y, threadIdx.x); + + // Get the Voxel we'll update in this thread + // NOTE(alexmillane): Note that we've reverse the voxel indexing order such + // that adjacent threads (x-major) access adjacent memory locations in the + // block (z-major). + TsdfVoxel* voxel_ptr = + &(block_device_ptrs[blockIdx.x] + ->voxels[threadIdx.z][threadIdx.y][threadIdx.x]); + + // Voxel center point + const Vector3f p_voxel_center_L = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_idx, voxel_idx); + // To camera frame + const Vector3f p_voxel_center_C = R_C_L * p_voxel_center_L + t_C_L; + + // Project to image plane + Eigen::Vector2f u_px; + if (!camera_device_ptr->project(p_voxel_center_C, &u_px)) { + return; + } + + // If the projected point does not lie on the image plane, fail. (Here "on the + // image plane" means having pixel centers surrounding the query point, ie no + // extrapolation). + if ((u_px.x() < 0.0f) || (u_px.y() < 0.0f) || + (u_px.x() > static_cast(camera_device_ptr->width()) - 1.0f) || + (u_px.y() > static_cast(camera_device_ptr->height()) - 1.0f)) { + return; + } + + // Get the MEASURED depth of the SURFACE, by interpolating the depth image + const float surface_depth_mesured = + interpolateDepthTexture(depth_texture, u_px); + + // Get the MEASURED depth of the VOXEL + const float voxel_distance_measured = + surface_depth_mesured - p_voxel_center_C.z(); + + // If we're behind the negative truncation distance, just continue. + if (voxel_distance_measured < -truncation_distance_m) { + return; + } + + // Read CURRENT voxel values (from global GPU memory) + const float voxel_distance_current = voxel_ptr->distance; + const float voxel_weight_current = voxel_ptr->weight; + + // NOTE(alexmillane): We could try to use CUDA math functions to speed up + // below + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html#group__CUDA__MATH__SINGLE + + // Fuse + constexpr float measurement_weight = 1.0f; + const float fused_distance = (voxel_distance_measured * measurement_weight + + voxel_distance_current * voxel_weight_current) / + (measurement_weight + voxel_weight_current); + + // Write back to voxel (to global GPU memory) + voxel_ptr->distance = fused_distance > 0.0f + ? fmin(truncation_distance_m, fused_distance) + : fmax(-truncation_distance_m, fused_distance); + voxel_ptr->weight = + fmin(measurement_weight + voxel_weight_current, max_weight); +} + +__global__ void intergrateBlocksGlobalBasedInterpolation( + const Index3D* block_indices_device_ptr, const Camera* camera_device_ptr, + const float* image, int rows, int cols, + const Eigen::Matrix3f* R_C_L_device_ptr, + const Eigen::Vector3f* t_C_L_device_ptr, const float block_size, + const float truncation_distance_m, const float max_weight, + VoxelBlock** block_device_ptrs) { + // Linear index of thread within block + const int thread_index_linear = + threadIdx.x + blockDim.x * (threadIdx.y + (blockDim.y * threadIdx.z)); + + // Get the data which is common between all threads in a block into shared + // memory + // TODO(alexmillane): We could also get the camera into shared memory. But + // maybe let's profile things first and see what is actually affecting the + // performance. + __shared__ Eigen::Matrix3f R_C_L; + if (thread_index_linear < 9) { + R_C_L.data()[thread_index_linear] = + R_C_L_device_ptr->data()[thread_index_linear]; + } + __shared__ Eigen::Vector3f t_C_L; + if (thread_index_linear >= 9 && thread_index_linear < 12) { + t_C_L.data()[thread_index_linear - 9] = + t_C_L_device_ptr->data()[thread_index_linear - 9]; + } + __syncthreads(); + + // The indices of the voxel this thread will work on + // blockIdx.x - The index of the block we're working on (blockIdx.y/z + // should be zero) + // threadIdx.x/y/z - The indices of the voxel within the block (we + // expect the threadBlockDims == voxelBlockDims) + const Index3D block_idx = block_indices_device_ptr[blockIdx.x]; + const Index3D voxel_idx(threadIdx.z, threadIdx.y, threadIdx.x); + + // Get the Voxel we'll update in this thread + // NOTE(alexmillane): Note that we've reverse the voxel indexing order such + // that adjacent threads (x-major) access adjacent memory locations in the + // block (z-major). + TsdfVoxel* voxel_ptr = + &(block_device_ptrs[blockIdx.x] + ->voxels[threadIdx.z][threadIdx.y][threadIdx.x]); + + // Voxel center point + const Vector3f p_voxel_center_L = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_idx, voxel_idx); + // To camera frame + const Vector3f p_voxel_center_C = R_C_L * p_voxel_center_L + t_C_L; + + // Project to image plane + Eigen::Vector2f u_px; + if (!camera_device_ptr->project(p_voxel_center_C, &u_px)) { + return; + } + + // Get the MEASURED depth of the SURFACE, by interpolating the depth image + float surface_depth_mesured; + if (!interpolateDepthImage(image, rows, cols, u_px, &surface_depth_mesured)) { + return; + } + + // Get the MEASURED depth of the VOXEL + const float voxel_distance_measured = + surface_depth_mesured - p_voxel_center_C.z(); + + // If we're behind the negative truncation distance, just continue. + if (voxel_distance_measured < -truncation_distance_m) { + return; + } + + // Read CURRENT voxel values (from global GPU memory) + const float voxel_distance_current = voxel_ptr->distance; + const float voxel_weight_current = voxel_ptr->weight; + + // NOTE(alexmillane): We could try to use CUDA math functions to speed up + // below + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html#group__CUDA__MATH__SINGLE + + // Fuse + constexpr float measurement_weight = 1.0f; + const float fused_distance = (voxel_distance_measured * measurement_weight + + voxel_distance_current * voxel_weight_current) / + (measurement_weight + voxel_weight_current); + + // Write back to voxel (to global GPU memory) + voxel_ptr->distance = fused_distance > 0.0f + ? fmin(truncation_distance_m, fused_distance) + : fmax(-truncation_distance_m, fused_distance); + voxel_ptr->weight = + fmin(measurement_weight + voxel_weight_current, max_weight); +} + +ProjectiveTsdfIntegratorExperimentsBase:: + ProjectiveTsdfIntegratorExperimentsBase() + : ProjectiveTsdfIntegrator() { + checkCudaErrors(cudaStreamCreate(&integration_stream_)); +} + +ProjectiveTsdfIntegratorExperimentsBase:: + ~ProjectiveTsdfIntegratorExperimentsBase() { + finish(); + checkCudaErrors(cudaStreamDestroy(integration_stream_)); +} + +void ProjectiveTsdfIntegratorExperimentsBase::finish() const { + cudaStreamSynchronize(integration_stream_); +} + +void ProjectiveTsdfIntegratorExperimentsTexture::updateBlocks( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, TsdfLayer* layer_ptr) { + CHECK_NOTNULL(layer_ptr); + + // Create an integrator frame + // Internally this object starts (asynchronous) transfers of it's inputs to + // device memory. Kernels called the passed stream can therefore utilize the + // input frame's device-side members. + const IntegratorInputFrameExperimentsTexture input( + block_indices, depth_frame, T_L_C, camera, truncation_distance_m, + max_weight_, layer_ptr, integration_stream_); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_blocks = input.num_blocks; + // clang-format off + intergrateBlocksTextureBasedInterpolation<<>>( + input.block_indices_device_ptr, + input.camera_device_ptr, + input.depth_texture.texture_object(), + input.R_C_L_device_ptr, + input.t_C_L_device_ptr, + input.block_size, + input.truncation_distance_m, + input.max_weight, + input.block_device_ptrs); + // clang-format on + checkCudaErrors(cudaPeekAtLastError()); + + // Finish processing of the frame before returning control + finish(); +} + +void ProjectiveTsdfIntegratorExperimentsGlobal::updateBlocks( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, TsdfLayer* layer_ptr) { + CHECK_NOTNULL(layer_ptr); + + // Create an integrator frame + // Internally this object starts (asynchronous) transfers of it's inputs to + // device memory. Kernels called the passed stream can therefore utilize the + // input frame's device-side members. + const IntegratorInputFrameExperimentsGlobal input( + block_indices, depth_frame, T_L_C, camera, truncation_distance_m, + max_weight_, layer_ptr, integration_stream_); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_blocks = input.num_blocks; + // clang-format off + intergrateBlocksGlobalBasedInterpolation<<>>( + input.block_indices_device_ptr, + input.camera_device_ptr, + input.depth_frame_unified_ptr, + input.depth_frame_rows, + input.depth_frame_cols, + input.R_C_L_device_ptr, + input.t_C_L_device_ptr, + input.block_size, + input.truncation_distance_m, + input.max_weight, + input.block_device_ptrs); + // clang-format on + checkCudaErrors(cudaPeekAtLastError()); + + // Finish processing of the frame before returning control + finish(); +} + +} // namespace experiments +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/accessors.h b/nvblox/include/nvblox/core/accessors.h new file mode 100644 index 00000000..0c3046a0 --- /dev/null +++ b/nvblox/include/nvblox/core/accessors.h @@ -0,0 +1,99 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { + +// Get Block +template +bool getBlockAtPosition(const BlockLayer& layer, + const Eigen::Vector3f& position, + const BlockType** data); + +// Get Voxel +template +bool getVoxelAtPosition(const BlockLayer>& layer, + const Eigen::Vector3f& position, + const VoxelType** data); + +// Accessors for voxelized types +template +const VoxelType* getVoxelAtBlockAndVoxelIndex( + const BlockLayer>& layer, const Index3D& block_index, + const Index3D& voxel_index); + +// Accessors for calling a function on all voxels in a layer +template +using ConstVoxelCallbackFunction = + std::function; + +template +using VoxelCallbackFunction = std::function; + +template +void callFunctionOnAllVoxels(const BlockLayer>& layer, + ConstVoxelCallbackFunction callback); + +template +void callFunctionOnAllVoxels(BlockLayer>* layer, + VoxelCallbackFunction callback); + +// Accessors for calling a function on all voxels in a block +template +using ConstBlockCallbackFunction = + std::function; + +template +using BlockCallbackFunction = + std::function; + +template +void callFunctionOnAllVoxels(const VoxelBlock& block, + ConstBlockCallbackFunction callback); + +template +void callFunctionOnAllVoxels(VoxelBlock* block, + BlockCallbackFunction callback); + +// Accessors for calling a function on all blocks in a layer +template +using ConstBlockCallbackFunction = + std::function; + +template +using BlockCallbackFunction = + std::function; + +template +void callFunctionOnAllBlocks(const BlockLayer& layer, + ConstBlockCallbackFunction callback); + +template +void callFunctionOnAllBlocks(BlockLayer* layer, + BlockCallbackFunction callback); + +} // namespace nvblox + +#include "nvblox/core/impl/accessors_impl.h" diff --git a/nvblox/include/nvblox/core/blox.h b/nvblox/include/nvblox/core/blox.h new file mode 100644 index 00000000..d3fad0a2 --- /dev/null +++ b/nvblox/include/nvblox/core/blox.h @@ -0,0 +1,55 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/unified_ptr.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { + +template +struct VoxelBlock { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + static constexpr size_t kVoxelsPerSide = 8; + VoxelType voxels[kVoxelsPerSide][kVoxelsPerSide][kVoxelsPerSide]; + + static Ptr allocate(MemoryType memory_type); + static void initOnGPU(VoxelBlock* block_ptr); +}; + +struct FreespaceBlock { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + bool free = true; +}; + +// Multires Block + +// Reference Block + +// Initialization Utility Functions +template +void setBlockBytesZeroOnGPU(BlockType* block_device_ptr); +void setColorBlockGrayOnGPU(VoxelBlock* block_device_ptr); + +} // namespace nvblox + +#include "nvblox/core/impl/blox_impl.h" diff --git a/nvblox/include/nvblox/core/bounding_boxes.h b/nvblox/include/nvblox/core/bounding_boxes.h new file mode 100644 index 00000000..6f1e9cd4 --- /dev/null +++ b/nvblox/include/nvblox/core/bounding_boxes.h @@ -0,0 +1,46 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +inline std::vector getBlockIndicesTouchedByBoundingBox( + const float block_size, const AxisAlignedBoundingBox& aabb); + +inline AxisAlignedBoundingBox getAABBOfBlock(const float block_size, + const Index3D& block_index); + +template +AxisAlignedBoundingBox getAABBOfAllocatedBlocks( + const BlockLayer& layer); + +template +std::vector getAllocatedBlocksWithinAABB( + const BlockLayer& layer, const AxisAlignedBoundingBox& aabb); + +AxisAlignedBoundingBox getAABBOfObservedVoxels(const EsdfLayer& layer); +AxisAlignedBoundingBox getAABBOfObservedVoxels(const TsdfLayer& layer, + const float min_weight = 1e-4); +AxisAlignedBoundingBox getAABBOfObservedVoxels(const ColorLayer& layer, + const float min_weight = 1e-4); + +} // namespace nvblox + +#include "nvblox/core/impl/bounding_boxes_impl.h" diff --git a/nvblox/include/nvblox/core/camera.h b/nvblox/include/nvblox/core/camera.h new file mode 100644 index 00000000..39722901 --- /dev/null +++ b/nvblox/include/nvblox/core/camera.h @@ -0,0 +1,130 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/types.h" + +namespace nvblox { + +class Frustum; +class BoundingPlane; + +/// Class that describes the parameters and FoV of a camera. +class Camera { + public: + __host__ __device__ inline Camera(float fu, float fv, float cu, float cv, + int width, int height); + __host__ __device__ inline Camera(float fu, float fv, int width, int height); + + __host__ __device__ inline bool project(const Vector3f& p_C, + Vector2f* u_C) const; + + /// Get the axis aligned bounding box of the view in the LAYER coordinate + /// frame. + __host__ AxisAlignedBoundingBox getViewAABB(const Transform& T_L_C, + const float min_depth, + const float max_depth) const; + + __host__ Frustum getViewFrustum(const Transform& T_L_C, const float min_depth, + const float max_depth) const; + + /// Gets the view corners in the CAMERA coordinate frame. + __host__ Eigen::Matrix getViewCorners( + const float min_depth, const float max_depth) const; + + // Returns an unnormalized ray direction in the camera frame corresponding to + // the passed pixel. + // Two functions, one for (floating point) image-plane coordinates, another + // function for pixel indices. + __host__ __device__ inline Vector3f rayFromImagePlaneCoordinates( + const Vector2f& u_C) const; + __host__ __device__ inline Vector3f rayFromPixelIndices( + const Index2D& u_C) const; + + // Accessors + __host__ __device__ inline float fu() const { return fu_; } + __host__ __device__ inline float fv() const { return fv_; } + __host__ __device__ inline float cu() const { return cu_; } + __host__ __device__ inline float cv() const { return cv_; } + __host__ __device__ inline int width() const { return width_; } + __host__ __device__ inline int height() const { return height_; } + __host__ __device__ inline int cols() const { return width_; } + __host__ __device__ inline int rows() const { return height_; } + + // Factories + inline static Camera fromIntrinsicsMatrix(const Eigen::Matrix3f& mat, + int width, int height); + + private: + const float fu_; + const float fv_; + const float cu_; + const float cv_; + + const int width_; + const int height_; +}; + +/// A bounding plane which has one "inside" direction and the other direction is +/// "outside." Quick tests for which side of the plane you are on. +class BoundingPlane { + public: + BoundingPlane() : normal_(Vector3f::Identity()), distance_(0.0f) {} + + void setFromPoints(const Vector3f& p1, const Vector3f& p2, + const Vector3f& p3); + void setFromDistanceNormal(const Vector3f& normal, float distance); + + /// Is the point on the correct side of the bounding plane? + bool isPointInside(const Vector3f& point) const; + + const Vector3f& normal() const { return normal_; } + float distance() const { return distance_; } + + private: + Vector3f normal_; + float distance_; +}; + +/// Class that allows checking for whether objects are within the field of view +/// of a camera or not. +class Frustum { + public: + // Frustum must be initialized with a camera and min and max depth and pose. + Frustum(const Camera& camera, const Transform& T_L_C, float min_depth, + float max_depth); + + AxisAlignedBoundingBox getAABB() const { return aabb_; } + + bool isPointInView(const Vector3f& point) const; + bool isAABBInView(const AxisAlignedBoundingBox& aabb) const; + + private: + // Helper functions to do the actual computations. + void computeBoundingPlanes(const Eigen::Matrix& corners_C, + const Transform& T_L_C); + + /// Bounding planes containing around the frustum. Expressed in the layer + /// coordinate frame. + std::array bounding_planes_L_; + + /// Cached AABB of the + AxisAlignedBoundingBox aabb_; +}; + +} // namespace nvblox + +#include "nvblox/core/impl/camera_impl.h" diff --git a/nvblox/include/nvblox/core/color.h b/nvblox/include/nvblox/core/color.h new file mode 100644 index 00000000..b5d84d7d --- /dev/null +++ b/nvblox/include/nvblox/core/color.h @@ -0,0 +1,55 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +namespace nvblox { + +struct Color { + __host__ __device__ Color() : r(0), g(0), b(0) {} + __host__ __device__ Color(uint8_t _r, uint8_t _g, uint8_t _b) + : r(_r), g(_g), b(_b) {} + + uint8_t r; + uint8_t g; + uint8_t b; + + bool operator==(const Color& other) const { + return (r == other.r) && (g == other.g) && (b == other.b); + } + + // Static functions for working with colors + static Color blendTwoColors(const Color& first_color, float first_weight, + const Color& second_color, float second_weight); + + // Now a bunch of static colors to use! :) + static const Color White() { return Color(255, 255, 255); } + static const Color Black() { return Color(0, 0, 0); } + static const Color Gray() { return Color(127, 127, 127); } + static const Color Red() { return Color(255, 0, 0); } + static const Color Green() { return Color(0, 255, 0); } + static const Color Blue() { return Color(0, 0, 255); } + static const Color Yellow() { return Color(255, 255, 0); } + static const Color Orange() { return Color(255, 127, 0); } + static const Color Purple() { return Color(127, 0, 255); } + static const Color Teal() { return Color(0, 255, 255); } + static const Color Pink() { return Color(255, 0, 127); } +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/common_names.h b/nvblox/include/nvblox/core/common_names.h new file mode 100644 index 00000000..646d85ff --- /dev/null +++ b/nvblox/include/nvblox/core/common_names.h @@ -0,0 +1,34 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/mesh/mesh_block.h" + +namespace nvblox { + +using TsdfBlock = VoxelBlock; +using TsdfLayer = VoxelBlockLayer; +using EsdfBlock = VoxelBlock; +using EsdfLayer = VoxelBlockLayer; +using ColorBlock = VoxelBlock; +using ColorLayer = VoxelBlockLayer; +using MeshLayer = BlockLayer; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/cuda/error_check.cuh b/nvblox/include/nvblox/core/cuda/error_check.cuh new file mode 100644 index 00000000..208f7c15 --- /dev/null +++ b/nvblox/include/nvblox/core/cuda/error_check.cuh @@ -0,0 +1,25 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +void check_cuda(cudaError_t result, char const* const func, + const char* const file, int const line); + +#define checkCudaErrors(val) check_cuda((val), #val, __FILE__, __LINE__) + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/cuda/image_cuda.h b/nvblox/include/nvblox/core/cuda/image_cuda.h new file mode 100644 index 00000000..d43cea03 --- /dev/null +++ b/nvblox/include/nvblox/core/cuda/image_cuda.h @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +namespace nvblox { +namespace cuda { + +template +void copy(const int rows, const int cols, const T* from, T* to); + +template +void toGPU(const T* data, const int rows, const int cols); + +float max(const int rows, const int cols, const float* image); +float min(const int rows, const int cols, const float* image); +std::pair minmax(const int rows, const int cols, + const float* image); + +} // namespace cuda +} // namespace nvblox + +#include "nvblox/core/cuda/impl/image_cuda_impl.cuh" diff --git a/nvblox/include/nvblox/core/cuda/impl/image_cuda_impl.cuh b/nvblox/include/nvblox/core/cuda/impl/image_cuda_impl.cuh new file mode 100644 index 00000000..673e6f5b --- /dev/null +++ b/nvblox/include/nvblox/core/cuda/impl/image_cuda_impl.cuh @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/cuda/error_check.cuh" + +namespace nvblox { +namespace cuda { + +template +void copy(const int rows, const int cols, const T* from, T* to) { + checkCudaErrors( + cudaMemcpy(to, from, rows * cols * sizeof(T), cudaMemcpyDefault)); +} + +template +void toGPU(const T* image, const int rows, const int cols) { + int device; + checkCudaErrors(cudaGetDevice(&device)); + checkCudaErrors(cudaMemPrefetchAsync(image, rows * cols * sizeof(T), device)); +} + +} // namespace cuda +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/cuda/warmup.h b/nvblox/include/nvblox/core/cuda/warmup.h new file mode 100644 index 00000000..34e50acb --- /dev/null +++ b/nvblox/include/nvblox/core/cuda/warmup.h @@ -0,0 +1,23 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +// Little function that warms up the CUDA drivers. +void warmupCuda(); + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/core/hash.h b/nvblox/include/nvblox/core/hash.h new file mode 100644 index 00000000..00228141 --- /dev/null +++ b/nvblox/include/nvblox/core/hash.h @@ -0,0 +1,68 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include + +#include "nvblox/core/types.h" + +namespace nvblox { + +/** + * Performs deco hashing on block indexes. Based on recommendations of + * "Investigating the impact of Suboptimal Hashing Functions" by L. Buckley et + * al. + */ +struct Index3DHash { + /// number was arbitrarily chosen with no good justification + static constexpr size_t sl = 17191; + static constexpr size_t sl2 = sl * sl; + + __host__ __device__ std::size_t operator()(const Index3D& index) const { + return static_cast(index.x() + index.y() * sl + + index.z() * sl2); + } +}; + +template +struct VectorCompare { + __host__ __device__ inline bool operator()(const T& p_1, const T& p_2) const { + if (p_1.x() != p_2.x()) { + return p_1.x() < p_2.x(); + } + if (p_1.y() != p_2.y()) { + return p_1.y() < p_2.y(); + } + return p_1.z() < p_2.z(); + }; +}; + +template +struct Index3DHashMapType { + typedef std::unordered_map< + Index3D, ValueType, Index3DHash, std::equal_to, + Eigen::aligned_allocator>> + type; +}; + +typedef std::unordered_set, + Eigen::aligned_allocator> + Index3DSet; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/image.h b/nvblox/include/nvblox/core/image.h new file mode 100644 index 00000000..09597167 --- /dev/null +++ b/nvblox/include/nvblox/core/image.h @@ -0,0 +1,152 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/color.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_ptr.h" + +namespace nvblox { + +// Row-major access to images (on host and device) +namespace image { + +template +__host__ __device__ inline ElementType access(int row_idx, int col_idx, + int cols, + const ElementType* data) { + return data[row_idx * cols + col_idx]; +} +template +__host__ __device__ inline ElementType& access(int row_idx, int col_idx, + int cols, ElementType* data) { + return data[row_idx * cols + col_idx]; +} +template +__host__ __device__ inline ElementType access(int linear_idx, + const ElementType* data) { + return data[linear_idx]; +} +template +__host__ __device__ inline ElementType& access(int linear_idx, + ElementType* data) { + return data[linear_idx]; +} + +} // namespace image + +// Row-major image. +// - Note that in a row-major image, rows follow one another in linear memory, +// which means the column index varied between subsequent elements. +// - Images use corner based indexing such that the pixel with index (0,0) is +// centered at (0.5px,0.5px) and spans (0.0px,0.0px) to (1.0px,1.0px). +// - Points on the image plane are defined as: u_px = (u_px.x(), u_px.y()) = +// (col_idx, row_idx) in pixels. + +constexpr MemoryType kDefaultImageMemoryType = MemoryType::kDevice; + +template +class Image { + public: + // "Save" the element type so it's queryable as Image::ElementType. + typedef _ElementType ElementType; + + Image(int rows, int cols, MemoryType memory_type = kDefaultImageMemoryType) + : rows_(rows), + cols_(cols), + memory_type_(memory_type), + data_(make_unified(static_cast(rows * cols), + memory_type_)) {} + Image(MemoryType memory_type = kDefaultImageMemoryType) + : rows_(0), cols_(0), memory_type_(memory_type) {} + Image(Image&& other); + Image& operator=(Image&& other); + + // Deep copy constructor (second can be used to transition memory type) + Image(const Image& other); + Image(const Image& other, MemoryType memory_type); + Image& operator=(const Image& other); + + // Prefetch the data to the gpu (only makes sense for kUnified images) + void toGPU() const; + + // Attributes + inline int cols() const { return cols_; } + inline int rows() const { return rows_; } + inline int numel() const { return cols_ * rows_; } + inline int width() const { return cols_; } + inline int height() const { return rows_; } + inline MemoryType memory_type() const { return memory_type_; } + + // Access + // NOTE(alexmillane): The guard-rails are off here. If you declare a kDevice + // Image and try to access its data, you will get undefined behaviour. + inline ElementType operator()(const int row_idx, const int col_idx) const { + return image::access(row_idx, col_idx, cols_, data_.get()); + } + inline ElementType& operator()(const int row_idx, const int col_idx) { + return image::access(row_idx, col_idx, cols_, data_.get()); + } + inline ElementType operator()(const int linear_idx) const { + return image::access(linear_idx, data_.get()); + } + inline ElementType& operator()(const int linear_idx) { + return image::access(linear_idx, data_.get()); + } + + // Raw pointer access + inline ElementType* dataPtr() { return data_.get(); } + inline const ElementType* dataConstPtr() const { return data_.get(); } + + // Reset the contents of the image. Reallocate if the image got larger. + void populateFromBuffer(int rows, int cols, const ElementType* buffer, + MemoryType memory_type = kDefaultImageMemoryType); + + // Factories + static inline Image fromBuffer( + int rows, int cols, const ElementType* buffer, + MemoryType memory_type = kDefaultImageMemoryType); + + protected: + int rows_; + int cols_; + MemoryType memory_type_; + unified_ptr data_; +}; + +using DepthImage = Image; +using ColorImage = Image; + +// Image Reductions +namespace image { + +float max(const DepthImage& image); +float min(const DepthImage& image); +std::pair minmax(const DepthImage& image); +float maxGPU(const DepthImage& image); +float minGPU(const DepthImage& image); +std::pair minmaxGPU(const DepthImage& image); +float maxCPU(const DepthImage& image); +float minCPU(const DepthImage& image); +std::pair minmaxCPU(const DepthImage& image); + +} // namespace image + +} // namespace nvblox + +#include "nvblox/core/impl/image_impl.h" diff --git a/nvblox/include/nvblox/core/impl/accessors_impl.h b/nvblox/include/nvblox/core/impl/accessors_impl.h new file mode 100644 index 00000000..79f852c3 --- /dev/null +++ b/nvblox/include/nvblox/core/impl/accessors_impl.h @@ -0,0 +1,201 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/indexing.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { + +template +bool getBlockAtPosition(const BlockLayer& layer, + const Eigen::Vector3f& position, + const BlockType** data) { + typename BlockType::ConstPtr block_ptr = layer.getBlockAtPosition(position); + if (block_ptr != nullptr) { + *data = block_ptr.get(); + return true; + } + return false; +} + +template +bool getVoxelAtPosition(const BlockLayer>& layer, + const Eigen::Vector3f& position, + const VoxelType** data) { + Index3D block_idx; + Index3D voxel_idx; + getBlockAndVoxelIndexFromPositionInLayer(layer.block_size(), position, + &block_idx, &voxel_idx); + const typename VoxelBlock::ConstPtr block_ptr = + layer.getBlockAtIndex(block_idx); + if (block_ptr) { + *data = &block_ptr->voxels[voxel_idx.x()][voxel_idx.y()][voxel_idx.z()]; + return true; + } + return false; +} + +template +void callFunctionOnAllVoxels(const BlockLayer>& layer, + ConstVoxelCallbackFunction callback) { + std::vector block_indices = layer.getAllBlockIndices(); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + bool clone = (layer.memory_type() == MemoryType::kDevice); + + // Iterate over all the blocks: + for (const Index3D& block_index : block_indices) { + Index3D voxel_index; + typename VoxelBlock::ConstPtr block; + if (clone) { + block = layer.getBlockAtIndex(block_index).clone(MemoryType::kHost); + } else { + block = layer.getBlockAtIndex(block_index); + } + if (!block) { + continue; + } + // Iterate over all the voxels: + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the voxel and call the callback on it: + const VoxelType* voxel = + &block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + callback(block_index, voxel_index, voxel); + } + } + } + } +} + +template +void callFunctionOnAllVoxels(BlockLayer>* layer, + VoxelCallbackFunction callback) { + std::vector block_indices = layer->getAllBlockIndices(); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + // Iterate over all the blocks: + for (const Index3D& block_index : block_indices) { + Index3D voxel_index; + typename VoxelBlock::Ptr block = + layer->getBlockAtIndex(block_index); + if (!block) { + continue; + } + // Iterate over all the voxels: + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the voxel and call the callback on it: + VoxelType* voxel = + &block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + callback(block_index, voxel_index, voxel); + } + } + } + } +} + +template +const VoxelType* getVoxelAtBlockAndVoxelIndex( + const BlockLayer>& layer, const Index3D& block_index, + const Index3D& voxel_index) { + typename VoxelBlock::ConstPtr block = + layer.getBlockAtIndex(block_index); + if (!block) { + return nullptr; + } + // TODO: add DCHECKS + return &block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; +} + +template +void callFunctionOnAllVoxels(const VoxelBlock& block, + ConstBlockCallbackFunction callback) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + // Iterate over all the voxels: + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the voxel and call the callback on it: + const VoxelType* voxel = + &block.voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + callback(voxel_index, voxel); + } + } + } +} + +template +void callFunctionOnAllVoxels(VoxelBlock* block, + BlockCallbackFunction callback) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + // Iterate over all the voxels: + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the voxel and call the callback on it: + VoxelType* voxel = + &block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + callback(voxel_index, voxel); + } + } + } +} + +template +void callFunctionOnAllBlocks(const BlockLayer& layer, + ConstBlockCallbackFunction callback) { + const std::vector block_indices = layer.getAllBlockIndices(); + for (const Index3D& block_index : block_indices) { + typename BlockType::ConstPtr block = layer.getBlockAtIndex(block_index); + CHECK(block); + callback(block_index, block.get()); + } +} + +template +void callFunctionOnAllBlocks(BlockLayer* layer, + BlockCallbackFunction callback) { + std::vector block_indices = layer->getAllBlockIndices(); + for (const Index3D& block_index : block_indices) { + typename BlockType::Ptr block = layer->getBlockAtIndex(block_index); + CHECK(block); + callback(block_index, block.get()); + } +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/blox_impl.h b/nvblox/include/nvblox/core/impl/blox_impl.h new file mode 100644 index 00000000..6bc4532d --- /dev/null +++ b/nvblox/include/nvblox/core/impl/blox_impl.h @@ -0,0 +1,50 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +template +typename VoxelBlock::Ptr VoxelBlock::allocate( + MemoryType memory_type) { + Ptr voxel_block_ptr = make_unified(memory_type); + if (memory_type == MemoryType::kDevice) { + initOnGPU(voxel_block_ptr.get()); + } + return voxel_block_ptr; +} + +template +void VoxelBlock::initOnGPU(VoxelBlock* block_ptr) { + setBlockBytesZeroOnGPU(block_ptr); +} + +// Initialization specialization for ColorVoxel which is initialized to gray +// with zero weight +template <> +inline void VoxelBlock::initOnGPU( + VoxelBlock* block_ptr) { + setColorBlockGrayOnGPU(block_ptr); +} + +template +void setBlockBytesZeroOnGPU(BlockType* block_device_ptr) { + // TODO(alexmillane): This is a cuda call in a public header... Is this + // causing issues? + cudaMemset(block_device_ptr, 0, sizeof(BlockType)); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/bounding_boxes_impl.h b/nvblox/include/nvblox/core/impl/bounding_boxes_impl.h new file mode 100644 index 00000000..de5e7998 --- /dev/null +++ b/nvblox/include/nvblox/core/impl/bounding_boxes_impl.h @@ -0,0 +1,78 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/accessors.h" + +namespace nvblox { + +inline std::vector getBlockIndicesTouchedByBoundingBox( + const float block_size, const AxisAlignedBoundingBox& aabb_L) { + // Get bounds (in voxels) + const Index3D aabb_min_vox_L = + getBlockIndexFromPositionInLayer(block_size, aabb_L.min()); + const Index3D aabb_max_vox_L = + getBlockIndexFromPositionInLayer(block_size, aabb_L.max()); + const Index3D num_blocks = aabb_max_vox_L - aabb_min_vox_L; + // Fill up the vector + std::vector block_indices; + block_indices.reserve(num_blocks.x() * num_blocks.y() * num_blocks.z()); + Index3D current_idx = aabb_min_vox_L; + for (int x_ind = 0; x_ind <= num_blocks.x(); x_ind++) { + for (int y_ind = 0; y_ind <= num_blocks.y(); y_ind++) { + for (int z_ind = 0; z_ind <= num_blocks.z(); z_ind++) { + block_indices.push_back(current_idx); + current_idx.z()++; + } + current_idx.z() -= (num_blocks.z() + 1); // reset z + current_idx.y()++; + } + current_idx.y() -= (num_blocks.y() + 1); // reset y + current_idx.x()++; + } + return block_indices; +} + +inline AxisAlignedBoundingBox getAABBOfBlock(const float block_size, + const Index3D& block_index) { + return AxisAlignedBoundingBox( + block_index.cast() * block_size, + (block_index.cast() + Vector3f(1.0f, 1.0f, 1.0f)) * block_size); +} + +template +AxisAlignedBoundingBox getAABBOfAllocatedBlocks( + const BlockLayer& layer) { + AxisAlignedBoundingBox aabb; + for (const Index3D& idx : layer.getAllBlockIndices()) { + aabb = aabb.merged(getAABBOfBlock(layer.block_size(), idx)); + } + return aabb; +} + +template +std::vector getAllocatedBlocksWithinAABB( + const BlockLayer& layer, const AxisAlignedBoundingBox& aabb) { + std::vector allocated_blocks; + for (const Index3D& idx : layer.getAllBlockIndices()) { + if (aabb.intersects(getAABBOfBlock(layer.block_size(), idx))) { + allocated_blocks.push_back(idx); + } + } + return allocated_blocks; +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/camera_impl.h b/nvblox/include/nvblox/core/impl/camera_impl.h new file mode 100644 index 00000000..496d226e --- /dev/null +++ b/nvblox/include/nvblox/core/impl/camera_impl.h @@ -0,0 +1,72 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +namespace nvblox { + +Camera::Camera(float fu, float fv, float cu, float cv, int width, int height) + : fu_(fu), fv_(fv), cu_(cu), cv_(cv), width_(width), height_(height) {} +Camera::Camera(float fu, float fv, int width, int height) + : Camera(fu, fv, width / 2.0, height / 2.0, width, height) {} + +bool Camera::project(const Eigen::Vector3f& p_C, Eigen::Vector2f* u_C) const { + // Point is behind the camera. + if (p_C.z() <= 0.0f) { + return false; + } + const float inv_z = 1.0f / p_C.z(); + *u_C = inv_z * Eigen::Vector2f(p_C.x(), p_C.y()); + + u_C->x() = u_C->x() * fu_ + cu_; + u_C->y() = u_C->y() * fv_ + cv_; + + if (u_C->x() > width_ || u_C->y() > height_ || u_C->x() < 0 || u_C->y() < 0) { + return false; + } + return true; +} + +Vector3f Camera::rayFromImagePlaneCoordinates(const Vector2f& u_C) const { + // NOTE(alexmillane): We allow u_C values up to the outer edges of pixels, + // hence the GE and LE checks. + DCHECK_GE(u_C[0], 0.0f); + DCHECK_LE(u_C[0], width_); + DCHECK_GE(u_C[1], 0.0f); + DCHECK_LE(u_C[1], height_); + return Vector3f((u_C[0] - cu_) / fu_, // NOLINT + (u_C[1] - cv_) / fv_, // NOLINT + 1.0f); +} + +Vector3f Camera::rayFromPixelIndices(const Index2D& u_C) const { + // NOTE(alexmillane): The +0.5 here takes us from image plane indices, which + // are equal to the coordinates of the lower pixel corner, to the pixel + // center. + return rayFromImagePlaneCoordinates(u_C.cast() + Vector2f(0.5, 0.5)); +} + +Camera Camera::fromIntrinsicsMatrix(const Eigen::Matrix3f& mat, int width, + int height) { + const float fu = mat(0, 0); + const float fv = mat(1, 1); + const float cu = mat(0, 2); + const float cv = mat(1, 2); + return Camera(fu, fv, cu, cv, width, height); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/image_impl.h b/nvblox/include/nvblox/core/impl/image_impl.h new file mode 100644 index 00000000..a3acb44d --- /dev/null +++ b/nvblox/include/nvblox/core/impl/image_impl.h @@ -0,0 +1,112 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/cuda/image_cuda.h" +#include "nvblox/core/interpolation_2d.h" +#include "nvblox/io/csv.h" + +namespace nvblox { + +template +Image::Image(const Image& other) + : Image(other, other.memory_type_){}; + +template +Image::Image(const Image& other, + MemoryType memory_type) + : rows_(other.rows()), + cols_(other.cols()), + memory_type_(memory_type), + data_(make_unified(static_cast(rows_ * cols_), + memory_type)) { + LOG(WARNING) << "Deep copy of Image."; + cuda::copy(rows_, cols_, other.data_.get(), data_.get()); +} + +template +Image::Image(Image&& other) + : rows_(other.rows()), + cols_(other.cols()), + memory_type_(other.memory_type()), + data_(std::move(other.data_)) {} + +template +Image& Image::operator=(Image&& other) { + rows_ = other.rows_; + cols_ = other.cols_; + memory_type_ = other.memory_type_; + data_ = std::move(other.data_); + return *this; +} + +template +Image& Image::operator=( + const Image& other) { + LOG(WARNING) << "Deep copy of Image."; + rows_ = other.rows_; + cols_ = other.cols_; + memory_type_ = other.memory_type_; + data_ = make_unified(static_cast(rows_ * cols_), + memory_type_); + cuda::copy(rows_, cols_, other.data_.get(), data_.get()); + return *this; +} + +template +void Image::toGPU() const { + CHECK(memory_type_ == MemoryType::kUnified) + << "Called kUnified function on kDevice image."; + cuda::toGPU(data_.get(), rows_, cols_); +} + +template +ImageType fromBufferTemplate(int rows, int cols, + const typename ImageType::ElementType* buffer, + MemoryType memory_type) { + ImageType image(rows, cols, memory_type); + cuda::copy(rows, cols, buffer, + image.dataPtr()); + return image; +} + +template +Image Image::fromBuffer(int rows, int cols, + const ElementType* buffer, + MemoryType memory_type) { + return fromBufferTemplate>(rows, cols, buffer, + memory_type); +} + +template +void Image::populateFromBuffer(int rows, int cols, + const ElementType* buffer, + MemoryType memory_type) { + if (!data_ || numel() < rows * cols || memory_type != memory_type_) { + // We need to reallocate. + data_ = make_unified(static_cast(rows * cols), + memory_type); + } + rows_ = rows; + cols_ = cols; + memory_type_ = memory_type; + cudaMemcpy(data_.get(), buffer, rows * cols * sizeof(ElementType), + cudaMemcpyDefault); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/indexing_impl.h b/nvblox/include/nvblox/core/impl/indexing_impl.h new file mode 100644 index 00000000..3c50414b --- /dev/null +++ b/nvblox/include/nvblox/core/impl/indexing_impl.h @@ -0,0 +1,94 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/blox.h" + +namespace nvblox { + +// TODO: this is badly done now for non-Eigen types. +/// Input position: metric (world) units with respect to the layer origin. +/// Output index: voxel index relative to the block origin. +Index3D getVoxelIndexFromPositionInLayer(const float block_size, + const Vector3f& position) { + constexpr float kVoxelsPerSideInv = 1.0f / VoxelBlock::kVoxelsPerSide; + const float voxel_size = block_size * kVoxelsPerSideInv; + + Index3D global_voxel_index = + (position / voxel_size).array().floor().cast(); + Index3D voxel_index = global_voxel_index.unaryExpr([&](int x) { + return static_cast(x % VoxelBlock::kVoxelsPerSide); + }); + + // Double-check that we get reasonable indices out. + DCHECK((voxel_index.array() >= 0).all() && + (voxel_index.array() < VoxelBlock::kVoxelsPerSide).all()); + + return voxel_index; +} + +Index3D getBlockIndexFromPositionInLayer(const float block_size, + const Vector3f& position) { + Eigen::Vector3i index = (position / block_size).array().floor().cast(); + return Index3D(index.x(), index.y(), index.z()); +} + +void getBlockAndVoxelIndexFromPositionInLayer(const float block_size, + const Vector3f& position, + Index3D* block_idx, + Index3D* voxel_idx) { + constexpr int kVoxelsPerSideMinusOne = VoxelBlock::kVoxelsPerSide - 1; + constexpr float kVoxelsPerSideInv = 1.0f / VoxelBlock::kVoxelsPerSide; + const float voxel_size_inv = 1.0 / (block_size * kVoxelsPerSideInv); + *block_idx = (position / block_size).array().floor().cast(); + *voxel_idx = + ((position - block_size * block_idx->cast()) * voxel_size_inv) + .array() + .cast() + .min(kVoxelsPerSideMinusOne); +} + +Vector3f getPositionFromBlockIndexAndVoxelIndex(const float block_size, + const Index3D& block_index, + const Index3D& voxel_index) { + constexpr float kVoxelsPerSideInv = 1.0f / VoxelBlock::kVoxelsPerSide; + const float voxel_size = block_size * kVoxelsPerSideInv; + + return Vector3f(block_size * block_index.cast() + + voxel_size * voxel_index.cast()); +} + +Vector3f getPositionFromBlockIndex(const float block_size, + const Index3D& block_index) { + // This is pretty trivial, huh. + return Vector3f(block_size * block_index.cast()); +} + +Vector3f getCenterPostionFromBlockIndexAndVoxelIndex( + const float block_size, const Index3D& block_index, + const Index3D& voxel_index) { + constexpr float kHalfVoxelsPerSideInv = + 0.5f / VoxelBlock::kVoxelsPerSide; + const float half_voxel_size = block_size * kHalfVoxelsPerSideInv; + + return getPositionFromBlockIndexAndVoxelIndex(block_size, block_index, + voxel_index) + + Vector3f(half_voxel_size, half_voxel_size, half_voxel_size); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/interpolation_2d_impl.h b/nvblox/include/nvblox/core/impl/interpolation_2d_impl.h new file mode 100644 index 00000000..f426dfba --- /dev/null +++ b/nvblox/include/nvblox/core/impl/interpolation_2d_impl.h @@ -0,0 +1,131 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace interpolation { +namespace internal { + +__host__ __device__ inline float interpolatePixels(const Vector2f& xy_px, + const float& f00, + const float& f01, + const float& f10, + const float& f11) { + // Interpolation of a grid on with 1 pixel spacing. + // https://en.wikipedia.org/wiki/Bilinear_interpolation#On_the_unit_square + const Eigen::Matrix2f value_matrix = + (Eigen::Matrix2f() << f00, f01, f10, f11).finished(); + const Eigen::Vector2f x_vec(1.0f - xy_px.x(), xy_px.x()); + const Eigen::Vector2f y_vec(1.0f - xy_px.y(), xy_px.y()); + return x_vec.transpose() * value_matrix * y_vec; +} + +__host__ __device__ inline Color interpolatePixels(const Vector2f& xy_px, + const Color& c00, + const Color& c01, + const Color& c10, + const Color& c11) { + // We define interpolating two colors as independantly interpolating their + // channels. + return Color( + static_cast( + std::round(interpolatePixels(xy_px, c00.r, c01.r, c10.r, c11.r))), + static_cast( + std::round(interpolatePixels(xy_px, c00.g, c01.g, c10.g, c11.g))), + static_cast( + std::round(interpolatePixels(xy_px, c00.b, c01.b, c10.b, c11.b)))); +} + +} // namespace internal + +template +bool interpolate2D(const Image& frame, const Vector2f& u_px, + ElementType* value_interpolated_ptr, + const InterpolationType type) { + if (type == InterpolationType::kNearestNeighbor) { + return interpolate2DClosest(frame, u_px, value_interpolated_ptr); + } + if (type == InterpolationType::kLinear) { + return interpolate2DLinear(frame, u_px, value_interpolated_ptr); + } else { + CHECK(false) << "Requested interpolation method is not implemented."; + } + return 0.0; +} + +template +bool interpolate2DClosest(const Image& frame, const Vector2f& u_px, + ElementType* value_interpolated_ptr) { + return interpolate2DClosest(frame.dataConstPtr(), u_px, frame.rows(), + frame.cols(), value_interpolated_ptr); +} + +template +bool interpolate2DClosest(const ElementType* frame, const Vector2f& u_px, + const int rows, const int cols, + ElementType* value_interpolated_ptr) { + // Closest pixel + const Index2D u_M_rounded = u_px.array().round().cast(); + // Check bounds: + if (u_M_rounded.x() < 0 || u_M_rounded.y() < 0 || u_M_rounded.x() >= cols || + u_M_rounded.y() >= rows) { + return false; + } + // "Interpolate" + *value_interpolated_ptr = + image::access(u_M_rounded.y(), u_M_rounded.x(), cols, frame); + return true; +} + +template +bool interpolate2DLinear(const Image& frame, const Vector2f& u_px, + ElementType* value_interpolated_ptr) { + return interpolate2DLinear(frame.dataConstPtr(), u_px, frame.rows(), + frame.cols(), value_interpolated_ptr); +} + +template +bool interpolate2DLinear(const ElementType* frame, const Vector2f& u_px, + const int rows, const int cols, + ElementType* value_interpolated_ptr) { + // Subtraction of Vector2f(0.5, 0.5) takes our coordinates from + // corner-referenced to center-referenced. + const Vector2f u_center_referenced_px = u_px - Vector2f(0.5, 0.5); + // Get the pixel index of the pixel on the low side (which is also the image + // plane location of the pixel center). + const Index2D u_low_side_px = u_center_referenced_px.cast(); + // If we're gonna access out of bounds, fail. + if ((u_low_side_px.array() < 0).any() || + ((u_low_side_px.x() + 1) > (cols - 1)) || + ((u_low_side_px.y() + 1) > (rows - 1))) { + return false; + } + // Offset of the requested point to the low side center. + const Eigen::Vector2f u_offset = + (u_center_referenced_px - u_low_side_px.cast()); + // clang-format off + *value_interpolated_ptr = internal::interpolatePixels( + u_offset, + image::access(u_low_side_px.y(), u_low_side_px.x(), cols, frame), + image::access(u_low_side_px.y() + 1, u_low_side_px.x(), cols, frame), + image::access(u_low_side_px.y(), u_low_side_px.x() + 1, cols, frame), + image::access(u_low_side_px.y() + 1, u_low_side_px.x() + 1, cols, frame)); + // clang-format on + return true; +} + +} // namespace interpolation +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/interpolation_3d_impl.h b/nvblox/include/nvblox/core/impl/interpolation_3d_impl.h new file mode 100644 index 00000000..71ef90af --- /dev/null +++ b/nvblox/include/nvblox/core/impl/interpolation_3d_impl.h @@ -0,0 +1,190 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +namespace nvblox { +namespace interpolation { + +template +void interpolateOnCPU(const std::vector& points_L, + const VoxelBlockLayer& layer, + std::vector* distances_ptr, + std::vector* success_flags_ptr) { + CHECK_NOTNULL(distances_ptr); + CHECK_NOTNULL(success_flags_ptr); + CHECK(layer.memory_type() == MemoryType::kUnified) + << "For CPU-based interpolation, the layer must be CPU accessible (ie " + "MemoryType::kUnified)."; + distances_ptr->reserve(points_L.size()); + success_flags_ptr->reserve(points_L.size()); + for (const Vector3f& p_L : points_L) { + float distance; + success_flags_ptr->push_back(interpolateOnCPU(p_L, layer, &distance)); + distances_ptr->push_back(distance); + } +} + +namespace internal { + +// Interpolate a layer's voxel's members using a member-access function +template +using GetMemberFunctionType = std::function; +template +using VoxelValidFunctionType = std::function; + +template +bool interpolateMemberOnCPU(const Vector3f& p_L, + const VoxelBlockLayer& layer, + GetMemberFunctionType get_member, + VoxelValidFunctionType is_valid, + float* result_ptr); + +template +bool getSurroundingVoxels3D(const Eigen::Vector3f p_L, + const VoxelBlockLayer& layer, + VoxelValidFunctionType is_valid, + std::array* voxels_ptr, + Vector3f* p_offset_in_voxels_L_ptr = nullptr) { + CHECK_NOTNULL(voxels_ptr); + // Get the low-side voxel: ie the voxel whose midpoint lies on the low side of + // the query point + // NOTE(alexmillane): We implement this by finding the block + // and voxel coordinates of a point half a voxel-width lower than the query + // point. + const float voxel_size = layer.voxel_size(); + const float half_voxel_size = voxel_size * 0.5f; + Index3D low_block_idx; + Index3D low_voxel_idx; + getBlockAndVoxelIndexFromPositionInLayer(layer.block_size(), + p_L.array() - half_voxel_size, + &low_block_idx, &low_voxel_idx); + + // Get offset between bottom-left-corner voxel and the point + if (p_offset_in_voxels_L_ptr != nullptr) { + const Vector3f p_corner_L = + getPositionFromBlockIndexAndVoxelIndex(layer.block_size(), + low_block_idx, low_voxel_idx) + + half_voxel_size * Vector3f::Ones(); + *p_offset_in_voxels_L_ptr = (p_L - p_corner_L) / voxel_size; + } + + // Get the 8 voxels surrounding this point + int linear_idx = 0; + for (int x = 0; x < 2; x++) { + for (int y = 0; y < 2; y++) { + for (int z = 0; z < 2; z++) { + Index3D voxel_idx = low_voxel_idx + Index3D(x, y, z); + Index3D block_idx = low_block_idx; + + // Move into a neighbouring block(s) if required + if ((voxel_idx.array() == VoxelBlock::kVoxelsPerSide).any()) { + // Increment block index in dimensions which have hit the limits + const Eigen::Array limits_hit_flags = + (voxel_idx.array() == VoxelBlock::kVoxelsPerSide); + block_idx += limits_hit_flags.matrix().cast(); + // Reset voxel index to zero in dimensions in which the block idx was + // incremented + voxel_idx = limits_hit_flags.select(0, voxel_idx); + } + + // Get the voxel + const typename VoxelBlock::ConstPtr block_ptr = + layer.getBlockAtIndex(block_idx); + if (block_ptr == nullptr) { + return false; + } + const VoxelType& voxel = + block_ptr->voxels[voxel_idx.x()][voxel_idx.y()][voxel_idx.z()]; + + // Check voxel valid + if (!is_valid(voxel)) { + return false; + } + // Write + (*voxels_ptr)[linear_idx] = voxel; + ++linear_idx; + } + } + } + return true; +} + +Eigen::Matrix getQVector3D(const Vector3f& p_offset_in_voxels_L) { + // Units are in voxels + CHECK((p_offset_in_voxels_L.array() >= 0.0f).all()); + CHECK((p_offset_in_voxels_L.array() <= 1.0f).all()); + + constexpr float kEps = 1e-4; + constexpr float kOnePlusEps = 1.0f + kEps; + CHECK((p_offset_in_voxels_L.array() <= kOnePlusEps).all()); + // FROM PAPER (http://spie.org/samples/PM159.pdf) + // clang-format off + Eigen::Matrix q_vector; + q_vector << + 1, + p_offset_in_voxels_L[0], + p_offset_in_voxels_L[1], + p_offset_in_voxels_L[2], + p_offset_in_voxels_L[0] * p_offset_in_voxels_L[1], + p_offset_in_voxels_L[1] * p_offset_in_voxels_L[2], + p_offset_in_voxels_L[2] * p_offset_in_voxels_L[0], + p_offset_in_voxels_L[0] * p_offset_in_voxels_L[1] * p_offset_in_voxels_L[2]; + // clang-format on + return q_vector; +} + +template +bool interpolateMemberOnCPU(const Vector3f& p_L, + const VoxelBlockLayer& layer, + GetMemberFunctionType get_member, + VoxelValidFunctionType is_valid, + float* result_ptr) { + std::array voxels; + + Eigen::Vector3f p_offset_L; + if (!internal::getSurroundingVoxels3D(p_L, layer, is_valid, &voxels, + &p_offset_L)) { + return false; + } + + // Express interpolation as matrix mulitplication from paper: + // (http://spie.org/samples/PM159.pdf) + Eigen::Matrix member_vector; + for (int i = 0; i < 8; i++) { + member_vector[i] = get_member(voxels[i]); + } + const Eigen::Matrix q_vector = + internal::getQVector3D(p_offset_L); + // clang-format off + static const Eigen::Matrix interpolation_table = + (Eigen::Matrix() << + 1, 0, 0, 0, 0, 0, 0, 0, + -1, 0, 0, 0, 1, 0, 0, 0, + -1, 0, 1, 0, 0, 0, 0, 0, + -1, 1, 0, 0, 0, 0, 0, 0, + 1, 0, -1, 0, -1, 0, 1, 0, + 1, -1, -1, 1, 0, 0, 0, 0, + 1, -1, 0, 0, -1, 1, 0, 0, + -1, 1, 1, -1, 1, -1, -1, 1 + ) + .finished(); + // clang-format on + *result_ptr = q_vector * (interpolation_table * member_vector); + return true; +} + +} // namespace internal +} // namespace interpolation +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/layer_cake_impl.h b/nvblox/include/nvblox/core/impl/layer_cake_impl.h new file mode 100644 index 00000000..34e57801 --- /dev/null +++ b/nvblox/include/nvblox/core/impl/layer_cake_impl.h @@ -0,0 +1,108 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/variadic_template_tools.h" + +namespace nvblox { + +template +LayerType* LayerCake::add(MemoryType memory_type) { + if (layers_.count(typeid(LayerType)) == 0) { + // Allocate + auto layer_ptr = std::make_unique( + sizeArgumentFromVoxelSize(voxel_size_), memory_type); + LayerType* return_ptr = layer_ptr.get(); + // Store (as BaseLayer ptr) + layers_.emplace(std::type_index(typeid(LayerType)), std::move(layer_ptr)); + LOG(INFO) << "Adding Layer with type: " << typeid(LayerType).name() + << " , and memory_type: " << toString(memory_type) + << " to LayerCake."; + return return_ptr; + } else { + LOG(WARNING) << "Request to add a LayerType that's already in the cake. " + "Currently we only support single layers of each " + "LayerType. So we did nothing."; + return nullptr; + } +} + +template +const LayerType* LayerCake::getConstPtr() const { + auto it = layers_.find(std::type_index(typeid(LayerType))); + if (it != layers_.end()) { + const BaseLayer* base_ptr = it->second.get(); + const LayerType* ptr = dynamic_cast(base_ptr); + CHECK_NOTNULL(ptr); + return ptr; + } else { + LOG(WARNING) << "Request for a LayerType: " << typeid(LayerType).name() + << " which is not in the cake."; + return nullptr; + } +} + +template +LayerType* LayerCake::getPtr() { + auto it = layers_.find(std::type_index(typeid(LayerType))); + if (it != layers_.end()) { + BaseLayer* base_ptr = it->second.get(); + LayerType* ptr = dynamic_cast(base_ptr); + CHECK_NOTNULL(ptr); + return ptr; + } else { + LOG(WARNING) << "Request for a LayerType: " << typeid(LayerType).name() + << " which is not in the cake."; + return nullptr; + } +} + +template +const LayerType& LayerCake::get() const { + const LayerType* ptr = getConstPtr(); + CHECK_NOTNULL(ptr); + return *ptr; +} + +template +bool LayerCake::exists() const { + const auto it = layers_.find(std::type_index(typeid(LayerType))); + return (it != layers_.end()); +} + +template +LayerCake LayerCake::create(float voxel_size, MemoryType memory_type) { + static_assert(unique_types::value, + "At the moment we only support LayerCakes containing unique " + "LayerTypes."); + LayerCake cake(voxel_size); + BaseLayer* ignored[] = {cake.add(memory_type)...}; + return cake; +} + +template +LayerCake LayerCake::create(float voxel_size, + MemoryTypes... memory_types) { + static_assert(unique_types::value, + "At the moment we only support LayerCakes containing unique " + "LayerTypes."); + LayerCake cake(voxel_size); + BaseLayer* ignored[] = {cake.add(memory_types)...}; + return cake; +} + + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/layer_impl.h b/nvblox/include/nvblox/core/impl/layer_impl.h new file mode 100644 index 00000000..8be099ef --- /dev/null +++ b/nvblox/include/nvblox/core/impl/layer_impl.h @@ -0,0 +1,185 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +// Block accessors by index. +template +typename BlockType::Ptr BlockLayer::getBlockAtIndex( + const Index3D& index) { + // Look up the block in the hash? + // And return it. + auto it = blocks_.find(index); + if (it != blocks_.end()) { + return it->second; + } else { + return typename BlockType::Ptr(); + } +} + +template +typename BlockType::ConstPtr BlockLayer::getBlockAtIndex( + const Index3D& index) const { + const auto it = blocks_.find(index); + if (it != blocks_.end()) { + return (it->second); + } else { + return typename BlockType::ConstPtr(); + } +} + +template +typename BlockType::Ptr BlockLayer::allocateBlockAtIndex( + const Index3D& index) { + auto it = blocks_.find(index); + if (it != blocks_.end()) { + return it->second; + } else { + // Invalidate the GPU hash + gpu_layer_view_up_to_date_ = false; + // Blocks define their own method for allocation. + auto insert_status = + blocks_.emplace(index, BlockType::allocate(memory_type_)); + return insert_status.first->second; + } +} + +// Block accessors by position. +template +typename BlockType::Ptr BlockLayer::getBlockAtPosition( + const Eigen::Vector3f& position) { + return getBlockAtIndex( + getBlockIndexFromPositionInLayer(block_size_, position)); +} + +template +typename BlockType::ConstPtr BlockLayer::getBlockAtPosition( + const Eigen::Vector3f& position) const { + return getBlockAtIndex( + getBlockIndexFromPositionInLayer(block_size_, position)); +} + +template +typename BlockType::Ptr BlockLayer::allocateBlockAtPosition( + const Eigen::Vector3f& position) { + return allocateBlockAtIndex( + getBlockIndexFromPositionInLayer(block_size_, position)); +} + +template +std::vector BlockLayer::getAllBlockIndices() const { + std::vector indices; + indices.reserve(blocks_.size()); + + for (const auto& kv : blocks_) { + indices.push_back(kv.first); + } + + return indices; +} + +template +bool BlockLayer::isBlockAllocated(const Index3D& index) const { + const auto it = blocks_.find(index); + return (it != blocks_.end()); +} + +template +typename BlockLayer::GPULayerViewType +BlockLayer::getGpuLayerView() const { + if (!gpu_layer_view_) { + gpu_layer_view_ = std::make_unique(); + } + if (!gpu_layer_view_up_to_date_) { + (*gpu_layer_view_).reset(const_cast*>(this)); + gpu_layer_view_up_to_date_ = true; + } + return *gpu_layer_view_; +} + +// VoxelBlockLayer + +template +void VoxelBlockLayer::getVoxels( + const std::vector& positions_L, + std::vector* voxels_ptr, + std::vector* success_flags_ptr) const { + CHECK_NOTNULL(voxels_ptr); + CHECK_NOTNULL(success_flags_ptr); + + cudaStream_t transfer_stream; + checkCudaErrors(cudaStreamCreate(&transfer_stream)); + + voxels_ptr->resize(positions_L.size()); + success_flags_ptr->resize(positions_L.size()); + + for (int i = 0; i < positions_L.size(); i++) { + const Vector3f& p_L = positions_L[i]; + // Get the block address + Index3D block_idx; + Index3D voxel_idx; + getBlockAndVoxelIndexFromPositionInLayer(this->block_size_, p_L, &block_idx, + &voxel_idx); + const typename VoxelBlock::ConstPtr block_ptr = + this->getBlockAtIndex(block_idx); + if (!block_ptr) { + (*success_flags_ptr)[i] = false; + continue; + } + (*success_flags_ptr)[i] = true; + // Get the voxel address + const auto block_raw_ptr = block_ptr.get(); + const VoxelType* voxel_ptr = + &block_raw_ptr->voxels[voxel_idx.x()][voxel_idx.y()][voxel_idx.z()]; + // Copy the Voxel to the CPU + cudaMemcpyAsync(&(*voxels_ptr)[i], voxel_ptr, sizeof(VoxelType), + cudaMemcpyDefault, transfer_stream); + } + cudaStreamSynchronize(transfer_stream); + checkCudaErrors(cudaStreamDestroy(transfer_stream)); +} + +namespace internal { + +template +inline float sizeArgumentFromVoxelSize(float voxel_size); + +template <> +inline float sizeArgumentFromVoxelSize(float voxel_size) { + return voxel_size; +} + +template <> +inline float sizeArgumentFromVoxelSize(float voxel_size) { + return voxel_size * VoxelBlock::kVoxelsPerSide; +} + +} // namespace internal + +template +constexpr float sizeArgumentFromVoxelSize(float voxel_size) { + return internal::sizeArgumentFromVoxelSize< + typename traits::is_voxel_layer::type>(voxel_size); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/unified_ptr_impl.h b/nvblox/include/nvblox/core/impl/unified_ptr_impl.h new file mode 100644 index 00000000..ce842038 --- /dev/null +++ b/nvblox/include/nvblox/core/impl/unified_ptr_impl.h @@ -0,0 +1,353 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/cuda/error_check.cuh" + +namespace nvblox { + +// Single object +template +typename _Unified_if::_Single_object make_unified(Args&&... args) { + return make_unified(unified_ptr::kDefaultMemoryType, + std::forward(args)...); +} + +template +typename _Unified_if::_Single_object make_unified(MemoryType memory_type, + Args&&... args) { + T* cuda_ptr = nullptr; + if (memory_type == MemoryType::kDevice) { + // No constructor (or destructor, hence the check) + CHECK(std::is_trivially_destructible::value); + checkCudaErrors(cudaMalloc(&cuda_ptr, sizeof(T))); + return unified_ptr(cuda_ptr, memory_type); + } else if (memory_type == MemoryType::kUnified) { + // Constructor called + checkCudaErrors( + cudaMallocManaged(&cuda_ptr, sizeof(T), cudaMemAttachGlobal)); + return unified_ptr(new (cuda_ptr) T(args...), memory_type); + } else { + // Constructor called + checkCudaErrors(cudaMallocHost(&cuda_ptr, sizeof(T))); + return unified_ptr(new (cuda_ptr) T(args...), memory_type); + } +} + +// Array +template +typename _Unified_if::_Unknown_bound make_unified(std::size_t size) { + return make_unified(size, unified_ptr::kDefaultMemoryType); +} + +template +typename _Unified_if::_Unknown_bound make_unified(std::size_t size, + MemoryType memory_type) { + typedef typename std::remove_extent::type TNonArray; + TNonArray* cuda_ptr = nullptr; + if (memory_type == MemoryType::kDevice) { + // No constructor + checkCudaErrors(cudaMalloc(&cuda_ptr, sizeof(TNonArray) * size)); + return unified_ptr(cuda_ptr, memory_type, size); + } else if (memory_type == MemoryType::kUnified) { + // Default constructor + checkCudaErrors(cudaMallocManaged(&cuda_ptr, sizeof(TNonArray) * size, + cudaMemAttachGlobal)); + return unified_ptr(new (cuda_ptr) TNonArray[size], memory_type, size); + } else { + // Default constructor + checkCudaErrors(cudaMallocHost(&cuda_ptr, sizeof(TNonArray) * size)); + return unified_ptr(new (cuda_ptr) TNonArray[size], memory_type, size); + } +} + +// Default constructor. +template +unified_ptr::unified_ptr() + : memory_type_(kDefaultMemoryType), ptr_(nullptr), ref_counter_(nullptr) {} + +// Useful constructor. +template +unified_ptr::unified_ptr(T_noextent* ptr, MemoryType memory_type, + size_t size) + : memory_type_(memory_type), + size_(size), + ptr_(ptr), + ref_counter_(new std::atomic(1)) {} + +// Copy constructor. +template +unified_ptr::unified_ptr(const unified_ptr& other) + : memory_type_(other.memory_type_), + size_(other.size_), + ptr_(other.ptr_), + ref_counter_(other.ref_counter_) { + if (ptr_ != nullptr) { + (*ref_counter_)++; + } +} + +// Move constructor. +template +unified_ptr::unified_ptr(unified_ptr&& other) + : memory_type_(other.memory_type_), + size_(other.size_), + ptr_(other.ptr_), + ref_counter_(other.ref_counter_) { + if (ptr_ != nullptr) { + (*ref_counter_)++; + } +} + +template +struct Deleter { + static void destroy(T* ptr, MemoryType memory_type) { + if (memory_type == MemoryType::kUnified) { + ptr->~T(); + checkCudaErrors( + cudaFree(const_cast(reinterpret_cast(ptr)))); + } else if (memory_type == MemoryType::kDevice) { + checkCudaErrors( + cudaFree(const_cast(reinterpret_cast(ptr)))); + } else { + ptr->~T(); + checkCudaErrors( + cudaFreeHost(const_cast(reinterpret_cast(ptr)))); + } + } +}; + +template +struct Deleter { + static void destroy(T* ptr, MemoryType memory_type) { + // Do nothing at run-time + static_assert( + std::is_trivially_destructible::value, + "Objects stored in unified_ptr must be trivially destructible."); + if (memory_type == MemoryType::kDevice || + memory_type == MemoryType::kUnified) { + checkCudaErrors( + cudaFree(const_cast(reinterpret_cast(ptr)))); + } else { + checkCudaErrors( + cudaFreeHost(const_cast(reinterpret_cast(ptr)))); + } + } +}; + +// Destructor. +template +unified_ptr::~unified_ptr() { + if (ptr_ != nullptr) { + (*ref_counter_)--; + if (*ref_counter_ <= 0) { + Deleter::destroy(ptr_, memory_type_); + delete ref_counter_; + ptr_ = nullptr; + ref_counter_ = nullptr; + } + } +} + +// Operators +template +unified_ptr& unified_ptr::operator=(const unified_ptr& other) { + reset(); + ptr_ = other.ptr_; + ref_counter_ = other.ref_counter_; + memory_type_ = other.memory_type_; + if (ptr_ != nullptr) { + (*ref_counter_)++; + } + return *this; +} + +template +unified_ptr& unified_ptr::operator=(unified_ptr&& other) { + reset(); + ptr_ = other.ptr_; + ref_counter_ = other.ref_counter_; + memory_type_ = other.memory_type_; + if (ptr_ != nullptr) { + (*ref_counter_)++; + } + return *this; +} + +template +template ::value, bool>> +unified_ptr::operator unified_ptr() const { + unified_ptr const_ptr; + if (ptr_ != nullptr) { + const_ptr.ptr_ = ptr_; + const_ptr.ref_counter_ = ref_counter_; + const_ptr.memory_type_ = memory_type_; + (*const_ptr.ref_counter_)++; + } + return const_ptr; +} + +// Operator bool +template +unified_ptr::operator bool() const { + return ptr_ != nullptr; +} + +// Operator comparison +template +bool unified_ptr::operator==(const unified_ptr& other) const { + return ptr_ == other.ptr_; +} + +template +bool unified_ptr::operator!=(const unified_ptr& other) const { + return ptr_ != other.ptr_; +} + +template +typename unified_ptr::T_noextent* unified_ptr::operator->() { + CHECK(memory_type_ != MemoryType::kDevice); + return ptr_; +} + +template +T& unified_ptr::operator*() { + CHECK(memory_type_ != MemoryType::kDevice); + return *ptr_; +} + +template +const typename unified_ptr::T_noextent* unified_ptr::operator->() const { + CHECK(memory_type_ != MemoryType::kDevice); + return ptr_; +} + +template +const T& unified_ptr::operator*() const { + CHECK(memory_type_ != MemoryType::kDevice); + return *ptr_; +} + +// Getters +template +typename unified_ptr::T_noextent* unified_ptr::get() { + return ptr_; +} + +template +const typename unified_ptr::T_noextent* unified_ptr::get() const { + return ptr_; +} + +// Reset +template +void unified_ptr::reset() { + if (ptr_ != nullptr) { + (*ref_counter_)--; + if (*ref_counter_ <= 0) { + Deleter::destroy(ptr_, memory_type_); + delete ref_counter_; + } + ptr_ = nullptr; + ref_counter_ = nullptr; + } +} + +// Cloner +template +struct Cloner { + typedef typename std::remove_cv::type T_nonconst; + static unified_ptr clone(const unified_ptr& original, + MemoryType memory_type, size_t) { + auto other = make_unified(memory_type); + cudaMemcpy(other.get(), original.get(), sizeof(T), cudaMemcpyDefault); + return other; + } +}; + +template +struct Cloner { + typedef typename std::remove_extent::type T_noextent; + typedef typename std::remove_cv::type T_nonconst; + static unified_ptr clone(const unified_ptr& original, + MemoryType memory_type, size_t size) { + auto other = make_unified(size, memory_type); + cudaMemcpy(other.get(), original.get(), sizeof(T_noextent) * size, + cudaMemcpyDefault); + return other; + } +}; + +template +unified_ptr::type> unified_ptr::clone() const { + return Cloner::clone(*this, memory_type_, size_); +} + +template +unified_ptr::type> unified_ptr::clone( + MemoryType memory_type) const { + return Cloner::clone(*this, memory_type, size_); +} + +// Hint to move to GPU or CPU. +template +void unified_ptr::toGPU() { + CHECK(memory_type_ == MemoryType::kUnified); + if (ptr_ == nullptr) { + return; + } + int device; + cudaGetDevice(&device); + checkCudaErrors(cudaMemPrefetchAsync(ptr_, sizeof(T), device)); +} + +template +void unified_ptr::toGPU(cudaStream_t cuda_stream) { + CHECK(memory_type_ == MemoryType::kUnified); + if (ptr_ == nullptr) { + return; + } + int device; + cudaGetDevice(&device); + checkCudaErrors(cudaMemPrefetchAsync(ptr_, sizeof(T), device, cuda_stream)); +} + +template +void unified_ptr::toCPU() { + CHECK(memory_type_ == MemoryType::kUnified); + if (ptr_ == nullptr) { + return; + } + checkCudaErrors(cudaMemPrefetchAsync(ptr_, sizeof(T), cudaCpuDeviceId)); +} + +template +void unified_ptr::preferGPU() { + CHECK(memory_type_ == MemoryType::kUnified); + int device; + cudaGetDevice(&device); + checkCudaErrors(cudaMemAdvise(ptr_, sizeof(T), + cudaMemAdviseSetPreferredLocation, device)); +} + +template +void unified_ptr::setZero() { + checkCudaErrors(cudaMemset(ptr_, 0, sizeof(T))); +} +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/unified_vector_impl.h b/nvblox/include/nvblox/core/impl/unified_vector_impl.h new file mode 100644 index 00000000..bc203415 --- /dev/null +++ b/nvblox/include/nvblox/core/impl/unified_vector_impl.h @@ -0,0 +1,298 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include +#include + +#include "nvblox/core/cuda/error_check.cuh" + +namespace nvblox { + +// Constructors and destructors. +template +unified_vector::unified_vector(MemoryType memory_type) + : memory_type_(memory_type), + buffer_(nullptr), + buffer_size_(0), + buffer_capacity_(0) { + static_assert(std::is_default_constructible::value, + "Need to have a default constructor to use unified_vector."); + static_assert(std::is_trivially_destructible::value, + "Need to have a trivial destructor to use unified_vector."); +} + +template +unified_vector::unified_vector(size_t size, MemoryType memory_type) + : unified_vector(memory_type) { + resize(size); +} + +template +unified_vector::unified_vector(size_t size, const T& initial, + MemoryType memory_type) + : unified_vector(memory_type) { + CHECK_NE(static_cast(memory_type_), + static_cast(MemoryType::kDevice)) + << "Can't set initial values for device memory. Use copy instead."; + resize(size); + for (size_t i = 0; i < size; i++) { + buffer_[i] = initial; + } +} + +// Copy constructor. +template +unified_vector::unified_vector(const unified_vector& other, + MemoryType memory_type) + : unified_vector(memory_type) { + resize(other.size()); + + // Do a cuda memcpy on this, which will hopefully figure out the 2 data types + // correctly. + checkCudaErrors(cudaMemcpy(buffer_, other.data(), sizeof(T) * other.size(), + cudaMemcpyDefault)); +} + +template +unified_vector::unified_vector(const std::vector& other, + MemoryType memory_type) + : unified_vector(memory_type) { + resize(other.size()); + + checkCudaErrors(cudaMemcpy(buffer_, other.data(), sizeof(T) * other.size(), + cudaMemcpyDefault)); +} + +// Move constructor. +template +unified_vector::unified_vector(unified_vector&& other) + : memory_type_(other.memory_type_), + buffer_(other.buffer_), + buffer_size_(other.buffer_size_), + buffer_capacity_(other.buffer_capacity_) { + other.buffer_ = nullptr; + other.buffer_size_ = 0; + other.buffer_capacity_ = 0; +} + +template +unified_vector::~unified_vector() { + clear(); +} + +// Operators +template +T& unified_vector::operator[](size_t index) { + return buffer_[index]; +} + +template +const T& unified_vector::operator[](size_t index) const { + return buffer_[index]; +} + +// Copy assignment +template +unified_vector& unified_vector::operator=( + const unified_vector& other) { + resize(other.size()); + checkCudaErrors(cudaMemcpy(buffer_, other.data(), sizeof(T) * other.size(), + cudaMemcpyDefault)); + return *this; +} + +// Move assignment +template +unified_vector& unified_vector::operator=(unified_vector&& other) { + clear(); + buffer_ = other.buffer_; + buffer_size_ = other.buffer_size_; + buffer_capacity_ = other.buffer_capacity_; + other.buffer_ = nullptr; + other.buffer_size_ = 0; + other.buffer_capacity_ = 0; + return *this; +} + +template +unified_vector& unified_vector::operator=(const std::vector& other) { + resize(other.size()); + checkCudaErrors(cudaMemcpy(buffer_, other.data(), sizeof(T) * other.size(), + cudaMemcpyDefault)); + return *this; +} + +template +std::vector unified_vector::toVector() const { + std::vector vect(buffer_size_); + checkCudaErrors(cudaMemcpy(vect.data(), buffer_, sizeof(T) * buffer_size_, + cudaMemcpyDefault)); + return vect; +} + +// Get those raw pointers. +template +T* unified_vector::data() { + return buffer_; +} + +template +const T* unified_vector::data() const { + return buffer_; +} + +// Hint to move the memory to the GPU. +template +void unified_vector::toGPU() { + if (buffer_ == nullptr || buffer_capacity_ == 0) { + return; + } + int device = 0; + checkCudaErrors(cudaGetDevice(&device)); + checkCudaErrors( + cudaMemPrefetchAsync(buffer_, buffer_capacity_ * sizeof(T), device)); +} + +template +void unified_vector::toCPU() { + if (buffer_ == nullptr || buffer_capacity_ == 0) { + return; + } + checkCudaErrors(cudaMemPrefetchAsync(buffer_, buffer_capacity_ * sizeof(T), + cudaCpuDeviceId)); +} + +// Accessors +template +size_t unified_vector::capacity() const { + return buffer_capacity_; +} + +template +size_t unified_vector::size() const { + return buffer_size_; +} + +template +bool unified_vector::empty() const { + return buffer_size_ == 0; +} + +// Changing the size. +template +void unified_vector::reserve(size_t capacity) { + if (buffer_capacity_ < capacity) { + // Create a new buffer. + T* new_buffer = nullptr; + + if (memory_type_ == MemoryType::kUnified) { + checkCudaErrors(cudaMallocManaged(&new_buffer, sizeof(T) * capacity, + cudaMemAttachGlobal)); + } else if (memory_type_ == MemoryType::kDevice) { + checkCudaErrors(cudaMalloc(&new_buffer, sizeof(T) * capacity)); + } else { + checkCudaErrors(cudaMallocHost(&new_buffer, sizeof(T) * capacity)); + } + + if (buffer_ != nullptr) { + // Copy the old values to the new buffer. + checkCudaErrors(cudaMemcpy(new_buffer, buffer_, sizeof(T) * buffer_size_, + cudaMemcpyDefault)); + + // Delete the old buffer. + if (memory_type_ == MemoryType::kHost) { + checkCudaErrors(cudaFreeHost(reinterpret_cast(buffer_))); + } else { + checkCudaErrors(cudaFree(reinterpret_cast(buffer_))); + } + } + buffer_ = new_buffer; + buffer_capacity_ = capacity; + } +} + +template +void unified_vector::resize(size_t size) { + // Absolute no-op. + if (buffer_size_ == size) { + return; + } else { + reserve(size); + buffer_size_ = size; + } +} + +template +void unified_vector::clear() { + if (buffer_ != nullptr) { + if (memory_type_ == MemoryType::kHost) { + checkCudaErrors(cudaFreeHost(reinterpret_cast(buffer_))); + } else { + checkCudaErrors(cudaFree(reinterpret_cast(buffer_))); + } + } + buffer_ = nullptr; + buffer_size_ = 0; + buffer_capacity_ = 0; +} + +template +void unified_vector::push_back(const T& value) { + CHECK_NE(static_cast(memory_type_), + static_cast(MemoryType::kDevice)) + << "Can't use push_back on device memory."; + + if (buffer_capacity_ < buffer_size_ + 1) { + // Do the vector thing and reserve double. + reserve(std::max(buffer_size_ * 2, buffer_size_ + 1)); + } + // Add the new value to the end. + buffer_[buffer_size_] = value; + buffer_size_++; +} + +template +typename unified_vector::iterator unified_vector::begin() { + return iterator(&buffer_[0]); +} + +template +typename unified_vector::iterator unified_vector::end() { + return iterator(&buffer_[buffer_size_]); +} + +template +typename unified_vector::const_iterator unified_vector::cbegin() const { + return const_iterator(&buffer_[0]); +} + +template +typename unified_vector::const_iterator unified_vector::cend() const { + return const_iterator(&buffer_[buffer_size_]); +} + +template +void unified_vector::setZero() { + // It is safe to use cudaMemset since the memory is ALWAYS allocated with + // cudaMalloc. + checkCudaErrors(cudaMemset(buffer_, 0, buffer_size_ * sizeof(T))); +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/impl/variadic_template_tools_impl.h b/nvblox/include/nvblox/core/impl/variadic_template_tools_impl.h new file mode 100644 index 00000000..b8d2322b --- /dev/null +++ b/nvblox/include/nvblox/core/impl/variadic_template_tools_impl.h @@ -0,0 +1,43 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +// Unique type +template +struct unique_types { + static constexpr bool value = unique_types::value && + unique_types::value && + unique_types::value; +}; + +template +struct unique_types { + static constexpr bool value = !std::is_same::value; +}; + +template +struct unique_types { + static constexpr bool value = true; +}; + +template <> +struct unique_types<> { + static constexpr bool value = true; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/indexing.h b/nvblox/include/nvblox/core/indexing.h new file mode 100644 index 00000000..2a8d8834 --- /dev/null +++ b/nvblox/include/nvblox/core/indexing.h @@ -0,0 +1,50 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/types.h" + +namespace nvblox { + +// Assuming a fixed-size voxel block, get the voxel index of a voxel at that +// position within a block. +__host__ __device__ inline Index3D getVoxelIndexFromPositionInLayer( + const float block_size, const Vector3f& position); + +__host__ __device__ inline Index3D getBlockIndexFromPositionInLayer( + const float block_size, const Vector3f& position); + +__host__ __device__ inline void getBlockAndVoxelIndexFromPositionInLayer( + const float block_size, const Vector3f& position, Index3D* block_idx, + Index3D* voxel_idx); + +/// Gets the position of the minimum corner (i.e., the smallest towards negative +/// infinity of each axis). +__host__ __device__ inline Vector3f getPositionFromBlockIndexAndVoxelIndex( + const float block_size, const Index3D& block_index, + const Index3D& voxel_index); + +__host__ __device__ inline Vector3f getPositionFromBlockIndex( + const float block_size, const Index3D& block_index); + +/// Gets the CENTER of the voxel. +__host__ __device__ inline Vector3f getCenterPostionFromBlockIndexAndVoxelIndex( + const float block_size, const Index3D& block_index, + const Index3D& voxel_index); + +} // namespace nvblox + +#include "nvblox/core/impl/indexing_impl.h" diff --git a/nvblox/include/nvblox/core/interpolation_2d.h b/nvblox/include/nvblox/core/interpolation_2d.h new file mode 100644 index 00000000..3fdfdb17 --- /dev/null +++ b/nvblox/include/nvblox/core/interpolation_2d.h @@ -0,0 +1,52 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace interpolation { + +// CPU interfaces +template +__host__ inline bool interpolate2DClosest(const Image& frame, + const Vector2f& u_px, + ElementType* value_interpolated_ptr); +template +__host__ inline bool interpolate2DLinear(const Image& frame, + const Vector2f& u_px, + ElementType* value_interpolated_ptr); +template +__host__ bool interpolate2D( + const Image& frame, const Vector2f& u_px, + ElementType* value_interpolated_ptr, + const InterpolationType type = InterpolationType::kLinear); + +// GPU interfaces +template +__host__ __device__ inline bool interpolate2DClosest( + const ElementType* frame, const Vector2f& u_px, const int rows, + const int cols, ElementType* value_interpolated_ptr); +template +__host__ __device__ inline bool interpolate2DLinear( + const ElementType* frame, const Vector2f& u_px, const int rows, + const int cols, ElementType* value_interpolated_ptr); + +} // namespace interpolation +} // namespace nvblox + +#include "nvblox/core/impl/interpolation_2d_impl.h" diff --git a/nvblox/include/nvblox/core/interpolation_3d.h b/nvblox/include/nvblox/core/interpolation_3d.h new file mode 100644 index 00000000..05e491e7 --- /dev/null +++ b/nvblox/include/nvblox/core/interpolation_3d.h @@ -0,0 +1,43 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { +namespace interpolation { + +// Single points +bool interpolateOnCPU(const Vector3f& p_L, const TsdfLayer& layer, + float* distance); +bool interpolateOnCPU(const Vector3f& p_L, const EsdfLayer& layer, + float* distance); + +// Vectors of points +template +void interpolateOnCPU(const std::vector& points_L, + const VoxelBlockLayer& layer, + std::vector* distances_ptr, + std::vector* success_flags_ptr); + +} // namespace interpolation +} // namespace nvblox + +#include "nvblox/core/impl/interpolation_3d_impl.h" diff --git a/nvblox/include/nvblox/core/iterator.h b/nvblox/include/nvblox/core/iterator.h new file mode 100644 index 00000000..83af41c2 --- /dev/null +++ b/nvblox/include/nvblox/core/iterator.h @@ -0,0 +1,97 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +namespace nvblox { + +template +struct RawIterator { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + + __host__ __device__ RawIterator(pointer ptr) : ptr_(ptr) {} + + __host__ __device__ reference operator*() const { return *ptr_; } + __host__ __device__ pointer operator->() { return ptr_; } + __host__ __device__ RawIterator& operator++() { + ptr_++; + return *this; + } + __host__ __device__ RawIterator& operator--() { + ptr_--; + return *this; + } + __host__ __device__ RawIterator& operator++(int) { + RawIterator tmp = *this; + ++(*this); + return tmp; + } + __host__ __device__ RawIterator& operator+=(const difference_type& movement) { + ptr_ += movement; + return *this; + } + + // Suspiciously need this for CUB. + __host__ __device__ RawIterator& operator=(const T& value) { + *ptr_ = value; + return *this; + } + + __host__ __device__ RawIterator + operator+(const difference_type& movement) const { + pointer old_ptr = ptr_; + old_ptr += movement; + return RawIterator(old_ptr); + } + __host__ __device__ RawIterator + operator-(const difference_type& movement) const { + pointer old_ptr = ptr_; + old_ptr -= movement; + return RawIterator(old_ptr); + } + + __host__ __device__ RawIterator + operator[](const difference_type& movement) const { + pointer old_ptr = ptr_; + old_ptr += movement; + return RawIterator(old_ptr); + } + + // These two are needed for thrust: + __host__ __device__ operator T*() const { return ptr_; } + __host__ __device__ operator T() const { return *ptr_; } + + __host__ __device__ friend bool operator==(const RawIterator& a, + const RawIterator& b) { + return a.ptr_ == b.ptr_; + }; + __host__ __device__ friend bool operator!=(const RawIterator& a, + const RawIterator& b) { + return a.ptr_ != b.ptr_; + }; + + private: + pointer ptr_; +}; + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/core/layer.h b/nvblox/include/nvblox/core/layer.h new file mode 100644 index 00000000..04d81460 --- /dev/null +++ b/nvblox/include/nvblox/core/layer.h @@ -0,0 +1,192 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/hash.h" +#include "nvblox/core/traits.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_ptr.h" +#include "nvblox/gpu_hash/gpu_layer_view.h" + +namespace nvblox { + +class BaseLayer { + public: + typedef std::shared_ptr Ptr; + typedef std::shared_ptr ConstPtr; + + virtual ~BaseLayer() = default; + + // Just an inteface class +}; + +template +class BlockLayer : public BaseLayer { + public: + typedef std::shared_ptr Ptr; + typedef std::shared_ptr ConstPtr; + + // Check that custom block types implement allocate + static_assert(traits::has_allocate<_BlockType>::value, + "BlockType must implement an allocate() function."); + + // Allows inspection of the contained BlockType through LayerType::BlockType + typedef _BlockType BlockType; + typedef BlockLayer LayerType; + typedef GPULayerView GPULayerViewType; + + typedef typename Index3DHashMapType::type BlockHash; + + BlockLayer() = delete; + BlockLayer(float block_size, MemoryType memory_type) + : block_size_(block_size), + memory_type_(memory_type), + gpu_layer_view_up_to_date_(false) {} + virtual ~BlockLayer() {} + + // (Deep) Copy disabled + // NOTE(alexmillane): We could write these if needed in the future + BlockLayer(const BlockLayer& other) = delete; + BlockLayer& operator=(const BlockLayer& other) = delete; + + // Move operations + BlockLayer(BlockLayer&& other) = default; + BlockLayer& operator=(BlockLayer&& other) = default; + + // Block accessors by index. + typename BlockType::Ptr getBlockAtIndex(const Index3D& index); + typename BlockType::ConstPtr getBlockAtIndex(const Index3D& index) const; + typename BlockType::Ptr allocateBlockAtIndex(const Index3D& index); + + // Block accessors by position. + typename BlockType::Ptr getBlockAtPosition(const Vector3f& position); + typename BlockType::ConstPtr getBlockAtPosition( + const Vector3f& position) const; + typename BlockType::Ptr allocateBlockAtPosition(const Vector3f& position); + + // Get all blocks indices. + std::vector getAllBlockIndices() const; + + // Check if allocated + bool isBlockAllocated(const Index3D& index) const; + + __host__ __device__ float block_size() const { return block_size_; } + int numAllocatedBlocks() const { return blocks_.size(); } + + // Clear the layer of all data + void clear() { blocks_.clear(); } + + MemoryType memory_type() const { return memory_type_; } + + // GPU Hash + // Note(alexmillane): The hash returned here is invalidated by calls to + // allocateBlock + GPULayerViewType getGpuLayerView() const; + + protected: + const float block_size_; + const MemoryType memory_type_; + + // CPU Hash (Index3D -> BlockType::Ptr) + BlockHash blocks_; + + /// GPU Hash + /// NOTE(alexmillane): + /// - This has is subservient to the CPU version. The layer has to copy the + /// hash to GPU when it is requested. + /// - Cached such that if no blocks are allocated between requests, the + /// GPULayerView is not recopied. + /// - Lazily allocated (space allocated on the GPU first request) + /// - The "mutable" here is to enable caching in const member functions. + mutable bool gpu_layer_view_up_to_date_; + mutable std::unique_ptr gpu_layer_view_; +}; + +template +class VoxelBlockLayer : public BlockLayer> { + public: + typedef std::shared_ptr Ptr; + typedef std::shared_ptr ConstPtr; + + using Base = BlockLayer>; + + using VoxelBlockType = VoxelBlock; + + /// Constructor + /// @param voxel_size The size of each voxel + /// @param memory_type In which type of memory the blocks in this layer should + /// be stored. + VoxelBlockLayer(float voxel_size, MemoryType memory_type) + : BlockLayer(VoxelBlockType::kVoxelsPerSide * voxel_size, + memory_type), + voxel_size_(voxel_size) {} + VoxelBlockLayer() = delete; + virtual ~VoxelBlockLayer() {} + + // (Deep) Copy disabled + // NOTE(alexmillane): We could write these if needed in the future + VoxelBlockLayer(const VoxelBlockLayer& other) = delete; + VoxelBlockLayer& operator=(const VoxelBlockLayer& other) = delete; + + // Move operations + VoxelBlockLayer(VoxelBlockLayer&& other) = default; + VoxelBlockLayer& operator=(VoxelBlockLayer&& other) = default; + + /// Gets voxels by copy from a list of positions. + /// The positions are given with respect to the layer frame (L). The function + /// returns the closest voxels to the passed points. + /// If memory_type_ == kDevice, the function retrieves voxel data from the GPU + /// and transfers it to the CPU. Modifications to the returned voxel data do + /// not affect the layer (they're copies). + /// Note that this function performs a Cudamemcpy per voxel. So it will likely + /// be relatively slow. + /// @param positions_L query positions in layer frame + /// @param voxels_ptr a pointer to a vector of voxels where we'll store the + /// output + /// @param success_flags_ptr a pointer to a vector of flags indicating if we + /// were able to retrive each voxel. + void getVoxels(const std::vector& positions_L, + std::vector* voxels_ptr, + std::vector* success_flags_ptr) const; + + /// Returns the size of the voxels in this layer. + float voxel_size() const { return voxel_size_; } + + private: + const float voxel_size_; +}; + +namespace traits { + +template +struct is_voxel_layer : public std::false_type {}; + +template +struct is_voxel_layer> : public std::true_type {}; + +} // namespace traits + +// Returns voxel size or block size based on the layer type (at compile time) +template +constexpr float sizeArgumentFromVoxelSize(float voxel_size); + +} // namespace nvblox + +#include "nvblox/core/impl/layer_impl.h" diff --git a/nvblox/include/nvblox/core/layer_cake.h b/nvblox/include/nvblox/core/layer_cake.h new file mode 100644 index 00000000..9b04c236 --- /dev/null +++ b/nvblox/include/nvblox/core/layer_cake.h @@ -0,0 +1,78 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +class LayerCake { + public: + LayerCake() = default; + LayerCake(float voxel_size) : voxel_size_(voxel_size) {} + + // Deep Copy (disallowed for now) + LayerCake(const LayerCake& other) = delete; + LayerCake& operator=(const LayerCake& other) = delete; + + // Move + LayerCake(LayerCake&& other) = default; + LayerCake& operator=(LayerCake&& other) = default; + + template + LayerType* add(MemoryType memory_type); + + // Retrieve layers (as pointers) + template + LayerType* getPtr(); + template + const LayerType* getConstPtr() const; + + // Retrieve layers (as reference) (will fail if the layer doesn't exist) + template + const LayerType& get() const; + + template + bool exists() const; + + bool empty() const { return layers_.empty(); } + + void clear() { layers_.clear(); } + + // Factory accepting a list of LayerTypes (and MemoryTypes) + template + static LayerCake create(float voxel_size, MemoryType memory_type); + template + static LayerCake create(float voxel_size, MemoryTypes... memory_type); + + private: + // Params + float voxel_size_; + + // Stored layers + // Note(alexmillane): Currently we restrict the cake to storing a single layer + // of each type. + std::unordered_map> layers_; +}; + +} // namespace nvblox + +#include "nvblox/core/impl/layer_cake_impl.h" diff --git a/nvblox/include/nvblox/core/mapper.h b/nvblox/include/nvblox/core/mapper.h new file mode 100644 index 00000000..c828c9ff --- /dev/null +++ b/nvblox/include/nvblox/core/mapper.h @@ -0,0 +1,120 @@ +#pragma once + +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/hash.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/layer_cake.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/esdf_integrator.h" +#include "nvblox/integrators/projective_color_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/mesh/mesh_integrator.h" + +namespace nvblox { + +// The mapper classes wraps layers and integrators together. +// In the base class we only specify that a mapper should contain map layers and +// leave it up to sub-classes to add functionality. +class MapperBase { + public: + MapperBase() = default; + + // Move + MapperBase(MapperBase&& other) = default; + MapperBase& operator=(MapperBase&& other) = default; + + protected: + // Map layers + LayerCake layers_; +}; + +// The RgbdMapper class is what we consider the default (but extensible) mapping +// behaviour in nvblox. +// Contains: +// - TsdfLayer, ColorLayer, EsdfLayer, MeshLayer +// - Functions for integrating depth and color frames +// - Function for generating Meshes, ESDF, and ESDF-slices +class RgbdMapper : public MapperBase { + public: + RgbdMapper() = delete; + RgbdMapper(float voxel_size_m, MemoryType memory_type = MemoryType::kDevice); + virtual ~RgbdMapper() {} + + // Move + RgbdMapper(RgbdMapper&& other) = default; + RgbdMapper& operator=(RgbdMapper&& other) = default; + + void integrateDepth(const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera); + void integrateColor(const ColorImage& color_frame, const Transform& T_L_C, + const Camera& camera); + + /// Updates the mesh blocks which require an update + /// @return The indices of the blocks that were updated in this call. + std::vector updateMesh(); + + /// Updates the Esdf blocks which require update + /// @return The indices of the blocks that were updated in this call. + std::vector updateEsdf(); + std::vector updateEsdfSlice(float slice_input_z_min, + float slice_input_z_max, + float slice_output_z); + + // Const access to layers + const LayerCake& layers() const { return layers_; } + const TsdfLayer& tsdf_layer() const { return layers_.get(); } + const ColorLayer& color_layer() const { return layers_.get(); } + const EsdfLayer& esdf_layer() const { return layers_.get(); } + const MeshLayer& mesh_layer() const { return layers_.get(); } + + // Non-const access to layers + LayerCake& layers() { return layers_; } + TsdfLayer& tsdf_layer() { return *layers_.getPtr(); } + ColorLayer& color_layer() { return *layers_.getPtr(); } + EsdfLayer& esdf_layer() { return *layers_.getPtr(); } + MeshLayer& mesh_layer() { return *layers_.getPtr(); } + + // Const access to integrators + const ProjectiveTsdfIntegrator& tsdf_integrator() const { + return tsdf_integrator_; + } + const ProjectiveColorIntegrator& color_integrator() const { + return color_integrator_; + } + const MeshIntegrator& mesh_integrator() const { return mesh_integrator_; } + const EsdfIntegrator& esdf_integrator() const { return esdf_integrator_; } + + // Non-const access to integrators + ProjectiveTsdfIntegrator& tsdf_integrator() { return tsdf_integrator_; } + ProjectiveColorIntegrator& color_integrator() { return color_integrator_; } + MeshIntegrator& mesh_integrator() { return mesh_integrator_; } + EsdfIntegrator& esdf_integrator() { return esdf_integrator_; } + + protected: + // Params + float voxel_size_m_; + + // Behaviour params + enum class EsdfMode { k3D, k2D, kUnset }; + EsdfMode esdf_mode_ = EsdfMode::kUnset; + + // Map layers defined in base) + // For RgbdMapper class: TSDF, Color, ESDF, Mesh + // LayerCake layers_; + + // Integrators + ProjectiveTsdfIntegrator tsdf_integrator_; + ProjectiveColorIntegrator color_integrator_; + MeshIntegrator mesh_integrator_; + EsdfIntegrator esdf_integrator_; + + // These queue keep track of the blocks which need to be updated on the next + // calls to updateMeshLayer() and updateEsdfLayer() respectively. + Index3DSet mesh_blocks_to_update_; + Index3DSet esdf_blocks_to_update_; +}; + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/core/traits.h b/nvblox/include/nvblox/core/traits.h new file mode 100644 index 00000000..17bb32f7 --- /dev/null +++ b/nvblox/include/nvblox/core/traits.h @@ -0,0 +1,45 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/types.h" + +namespace nvblox { +namespace traits { + +// Trait which is true if the BlockType defines an allocate function +// NOTE(alexmillane): This type trait is not totally complete as it doesn't +// check the return type of allocate function... it will do for now. The intent +// is just to give people implementing new blocks a useful message when they +// forget to implement this function. +template +struct make_void { + typedef void type; +}; +template +using void_t = typename make_void::type; + +template +struct has_allocate : std::false_type {}; + +template +struct has_allocate< + BlockType, + void_t().allocate(MemoryType::kUnified))>> + : std::true_type {}; + +} // namespace traits +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/core/types.h b/nvblox/include/nvblox/core/types.h new file mode 100644 index 00000000..e9609da8 --- /dev/null +++ b/nvblox/include/nvblox/core/types.h @@ -0,0 +1,59 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include + +#include +#include +#include + +namespace nvblox { + +// Whether the storage or processing is happening on CPU, GPU, or any future +// amazing hardware- accelerated platform. +enum class DeviceType { kCPU, kGPU }; + +// How GPU data is stored, either in Device-only or unified (both) memory. +// NOTE(alexmillane): tag: c++17, switch to constexpr when we move to c++17. +enum class MemoryType { kDevice, kUnified, kHost }; +inline std::string toString(MemoryType memory_type) { + switch (memory_type) { + case MemoryType::kDevice: return "kDevice"; break; + case MemoryType::kUnified: return "kUnified"; break; + default: return "kHost"; break; + } +} + +typedef Eigen::Vector3i Index3D; +typedef Eigen::Vector2i Index2D; + +typedef Eigen::Vector3f Vector3f; +typedef Eigen::Vector2f Vector2f; + +typedef Eigen::AlignedBox3f AxisAlignedBoundingBox; + +typedef Eigen::Isometry3f Transform; + +// Aligned Eigen containers +template +using AlignedVector = std::vector>; + +enum class InterpolationType { kNearestNeighbor, kLinear, kMax, kMin }; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/core/unified_ptr.h b/nvblox/include/nvblox/core/unified_ptr.h new file mode 100644 index 00000000..c8adeae8 --- /dev/null +++ b/nvblox/include/nvblox/core/unified_ptr.h @@ -0,0 +1,188 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include + +#include "nvblox/core/types.h" + +namespace nvblox { + +// shared_ptr for device, unified memory, and pinned host memory +// Things to be aware of +// - Single objects +// - Constructor and Destructor are not called when memory_type==kDevice +// (these are CPU functions). Therefore unified_ptr generates an error if +// used in device mode with non-trivially destructible types. +// - Both Constructor and Destructor are called when memory_type==kUnified || +// kHost. +// - Arrays +// - Default Constructor called when storing arrays in kUnified || kHost. +// - No Constructor called in kDevice mode. +// - No destructor called for ANY memory setting so we make a static check +// that objects are trivially destructable. +template +class unified_ptr { + public: + typedef typename std::remove_extent::type T_noextent; + typedef typename std::remove_cv::type T_nonconst; + + static constexpr MemoryType kDefaultMemoryType = MemoryType::kUnified; + + unified_ptr(); + explicit unified_ptr(T_noextent* ptr, MemoryType memory_type, + size_t size = 1); + unified_ptr(const unified_ptr& other); + unified_ptr(unified_ptr&& other); + ~unified_ptr(); + + // Operator = + unified_ptr& operator=(const unified_ptr& other); + unified_ptr& operator=(unified_ptr&& other); + + // Operator bool + operator bool() const; + // Operator comparison + bool operator==(const unified_ptr& other) const; + bool operator!=(const unified_ptr& other) const; + + // Operator dereference + T_noextent* operator->(); + const T_noextent* operator->() const; + T& operator*(); + const T& operator*() const; + + // Operator array access + // Only enabled if the underlying type is an array + template ::value, bool> = true> + T_noextent& operator[](size_t i) { + CHECK(memory_type_ != MemoryType::kDevice); + return ptr_[i]; + } + template ::value, bool> = true> + const T_noextent& operator[](size_t i) const { + CHECK(memory_type_ != MemoryType::kDevice); + return ptr_[i]; + } + + // Operator convert + // Only enabled if the base type is NOT const, otherwise adds a second + // trivial converter. + template ::value, bool> = true> + operator unified_ptr() const; + + // Get the raw pointer. + T_noextent* get(); + const T_noextent* get() const; + + // Reset the pointer to point to nothing. + void reset(); + + // Copy the underlying object (potentially to another memory location) + // NOTE: This is implemented as a memcpy at the pointed to location. + unified_ptr clone() const; + unified_ptr clone(MemoryType memory_type) const; + + // Hint to move the memory to the GPU or CPU. + void toGPU(); + void toGPU(cudaStream_t cuda_stream); + void toCPU(); + // Advise CUDA to prefer the GPU for this memory. Should reduce paging fails. + void preferGPU(); + + MemoryType memory_type() const { return memory_type_; } + + // Helper function to memset all the memory to 0. + void setZero(); + + friend class unified_ptr; + friend class unified_ptr; + + private: + MemoryType memory_type_; + size_t size_; + + T_noextent* ptr_; + + mutable std::atomic* ref_counter_; +}; + +// Comparison with nullptr +template +bool operator==(const unified_ptr& lhs, std::nullptr_t) noexcept { + return !lhs; +} +template +bool operator==(std::nullptr_t, const unified_ptr& rhs) noexcept { + return !rhs; +} +template +bool operator!=(const unified_ptr& lhs, std::nullptr_t) noexcept { + return lhs; +} +template +bool operator!=(std::nullptr_t, const unified_ptr& rhs) noexcept { + return rhs; +} + +// These are used to specialize make_unique on the return type, which is need to +// generate class templates for non-array and array types. See gcc here: +// https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/unique_ptr.h#L943 +template +struct _Unified_if { + typedef unified_ptr _Single_object; +}; + +template +struct _Unified_if { + typedef unified_ptr _Unknown_bound; +}; + +template +struct _Unified_if { + typedef void _Known_bound; +}; + +// Single object, default storage location +template +typename _Unified_if::_Single_object make_unified(Args&&... args); + +// Specialization, Single object, specified storage location +template +typename _Unified_if::_Single_object make_unified(MemoryType memory_type, + Args&&... args); + +// Specialization for arrays, default storage location +template +typename _Unified_if::_Unknown_bound make_unified(std::size_t size); + +// Specialization for arrays, specified storage location +template +typename _Unified_if::_Unknown_bound make_unified(std::size_t size, + MemoryType memory_type); + +template +typename _Unified_if::_Known_bound make_unified(Args&&... args) = delete; + +} // namespace nvblox + +#include "nvblox/core/impl/unified_ptr_impl.h" diff --git a/nvblox/include/nvblox/core/unified_vector.h b/nvblox/include/nvblox/core/unified_vector.h new file mode 100644 index 00000000..4cab8e24 --- /dev/null +++ b/nvblox/include/nvblox/core/unified_vector.h @@ -0,0 +1,177 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "nvblox/core/iterator.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +/// Unified-memory CUDA vector that should only be used on trivial types +/// as the constructors and destructors *are NOT called*. +template +class unified_vector { + public: + typedef RawIterator iterator; + typedef RawIterator const_iterator; + + static constexpr MemoryType kDefaultMemoryType = MemoryType::kUnified; + + // Static asserts on the type. + static_assert( + std::is_default_constructible::value, + "Objects stored in unified vector should be default constructible."); + + unified_vector(MemoryType memory_type = kDefaultMemoryType); + unified_vector(size_t size, MemoryType memory_type = kDefaultMemoryType); + unified_vector(size_t size, const T& initial, + MemoryType memory_type = kDefaultMemoryType); + // Copy constructor. + unified_vector(const unified_vector& other, + MemoryType memory_type = kDefaultMemoryType); + unified_vector(const std::vector& other, + MemoryType memory_type = kDefaultMemoryType); + + // Move constructor. + unified_vector(unified_vector&& other); + + // Destructor + virtual ~unified_vector(); + + // Operators. + T& operator[](size_t index); + const T& operator[](size_t index) const; + unified_vector& operator=(const unified_vector& other); + unified_vector& operator=(unified_vector&& other); + unified_vector& operator=(const std::vector& other); + + // Convert to an std::vector. Creates a copy. + std::vector toVector() const; + + // Get raw pointers. This is also for GPU pointers. + T* data(); + const T* data() const; + + // Hint to move the memory to the GPU or CPU. + void toGPU(); + void toCPU(); + + // Access information. + size_t capacity() const; + size_t size() const; + bool empty() const; + + // Changing the size. + void reserve(size_t capacity); + void resize(size_t size); + void clear(); + + // Adding elements. + void push_back(const T& value); + + // Iterator access. + iterator begin(); + iterator end(); + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + const_iterator cbegin() const; + const_iterator cend() const; + + // Get the memory type. + MemoryType memory_type() const { return memory_type_; } + + // Set the entire *memory* of the vector to zero. + void setZero(); + + private: + MemoryType memory_type_; + + T* buffer_; + size_t buffer_size_; + size_t buffer_capacity_; +}; + +template +class device_vector : public unified_vector { + public: + device_vector() : unified_vector(MemoryType::kDevice) {} + device_vector(MemoryType memory_type) = delete; + device_vector(size_t size) : unified_vector(size, MemoryType::kDevice) {} + device_vector(size_t size, const T& initial) + : unified_vector(size, initial, MemoryType::kDevice) {} + + // Conversions from base + device_vector(const unified_vector& other) + : unified_vector(other, MemoryType::kDevice){}; + device_vector(unified_vector&& other) + : unified_vector(std::move(other), MemoryType::kDevice){}; + device_vector& operator=(const unified_vector& other) { + unified_vector::operator=(other); + return *this; + } + device_vector& operator=(unified_vector&& other) { + unified_vector::operator=(std::move(other)); + return *this; + } + + // From std::vector + device_vector(const std::vector& other) + : unified_vector(other, MemoryType::kDevice) {} + device_vector& operator=(const std::vector& other) { + unified_vector::operator=(other); + return *this; + } +}; + +template +class host_vector : public unified_vector { + public: + host_vector() : unified_vector(MemoryType::kHost) {} + host_vector(MemoryType memory_type) = delete; + host_vector(size_t size) : unified_vector(size, MemoryType::kHost) {} + host_vector(size_t size, const T& initial) + : unified_vector(size, initial, MemoryType::kHost) {} + + // Conversions from base + host_vector(const unified_vector& other) + : unified_vector(other, MemoryType::kHost){}; + host_vector(unified_vector&& other) + : unified_vector(std::move(other), MemoryType::kHost){}; + host_vector& operator=(const unified_vector& other) { + unified_vector::operator=(other); + return *this; + } + host_vector& operator=(unified_vector&& other) { + unified_vector::operator=(std::move(other)); + return *this; + } + + // From std::vector + host_vector(const std::vector& other) + : unified_vector(other, MemoryType::kHost) {} + host_vector& operator=(const std::vector& other) { + unified_vector::operator=(other); + return *this; + } +}; + +} // namespace nvblox + +#include "nvblox/core/impl/unified_vector_impl.h" \ No newline at end of file diff --git a/nvblox/include/nvblox/core/variadic_template_tools.h b/nvblox/include/nvblox/core/variadic_template_tools.h new file mode 100644 index 00000000..8fe5d870 --- /dev/null +++ b/nvblox/include/nvblox/core/variadic_template_tools.h @@ -0,0 +1,26 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +// Unique types +template +struct unique_types; + +} // nvblox + +#include "nvblox/core/impl/variadic_template_tools_impl.h" diff --git a/nvblox/include/nvblox/core/voxels.h b/nvblox/include/nvblox/core/voxels.h new file mode 100644 index 00000000..9a98bf1e --- /dev/null +++ b/nvblox/include/nvblox/core/voxels.h @@ -0,0 +1,56 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/color.h" + +namespace nvblox { + +struct TsdfVoxel { + // Signed projective distance of the voxel from a surface. + float distance = 0.0f; + // How many observations/how confident we are in this observation. + float weight = 0.0f; +}; + +struct EsdfVoxel { + // TODO(helen): optimize the memory layout here. + // Cached squared distance towards the parent. + float squared_distance_vox = 0.0f; + // Direction towards the parent, *in units of voxels*. + Eigen::Vector3i parent_direction = Eigen::Vector3i::Zero(); + // Whether this voxel is inside the surface or not. + bool is_inside = false; + // Whether this voxel has been observed. + bool observed = false; + // Whether this voxel is a "site": i.e., near the zero-crossing and is + // eligible to be considered a parent. + bool is_site = false; +}; + +struct ColorVoxel { + Color color = Color::Gray(); + // How many observations/how confident we are in this observation. + float weight = 0.0f; +}; + +struct FreespaceVoxel { + bool free = true; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/datasets/external/stb_image.h b/nvblox/include/nvblox/datasets/external/stb_image.h new file mode 100644 index 00000000..a1f18190 --- /dev/null +++ b/nvblox/include/nvblox/datasets/external/stb_image.h @@ -0,0 +1,7890 @@ +/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk + + Do this: + #define STB_IMAGE_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + // i.e. it should look like this: + #include ... + #include ... + #include ... + #define STB_IMAGE_IMPLEMENTATION + #include "stb_image.h" + + You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. + And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free + + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) + PNG 1/2/4/8/16-bit-per-channel + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels, 8/16 bit-per-channel) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + PNM (PPM and PGM binary only) + + Animated GIF still needs a proper API, but here's one way to do it: + http://gist.github.com/urraka/685d9a6340b26b830d49 + + - decode from memory or through FILE (define STBI_NO_STDIO to remove code) + - decode from arbitrary I/O callbacks + - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) + + Full documentation under "DOCUMENTATION" below. + + +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes + 2.26 (2020-07-13) many minor fixes + 2.25 (2020-02-02) fix warnings + 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically + 2.23 (2019-08-11) fix clang static analysis warning + 2.22 (2019-03-04) gif fixes, fix warnings + 2.21 (2019-02-25) fix typo in comment + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 + RGB-format JPEG; remove white matting in PSD; + allocate large structures on the stack; + correct channel count for PNG & BMP + 2.10 (2016-01-22) avoid warning introduced in 2.09 + 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED + + See end of file for full revision history. + + + ============================ Contributors ========================= + + Image formats Extensions, features + Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) + Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) + Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) + Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) + Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) + Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) + Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine Simon Breuss (16-bit PNM) + John-Mark Allen + Carmelo J Fdez-Aguera + + Bug & warning fixes + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski + Phil Jordan Dave Moore Roy Eltham + Hayaki Saito Nathan Reed Won Chun + Luke Graham Johan Duparc Nick Verigakis the Horde3D community + Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Eugene Golushkov Laurent Gomila Cort Stratton github:snagar + Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex + Cass Everitt Ryamond Barbiero github:grim210 + Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw + Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus + Josh Tobin Matthew Gregan github:poppolopoppo + Julian Raschke Gregory Mullen Christian Floisand github:darealshinji + Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 + Brad Weinberger Matvey Cherevko github:mosra + Luca Sas Alexander Veselov Zack Middleton [reserved] + Ryan C. Gordon [reserved] [reserved] + DO NOT ADD YOUR NAME HERE + + Jacko Dirks + + To add your name to the credits, pick a random blank space in the middle and fill it. + 80% of merge conflicts on stb PRs are due to people adding their name at the end + of the credits. +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// DOCUMENTATION +// +// Limitations: +// - no 12-bit-per-channel JPEG +// - no JPEGs with arithmetic coding +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below for HDR usage): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// // ... but 'n' will always be the number that it would have been if you said 0 +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data, or NULL on an allocation failure or if the image is +// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// To query the width, height and component count of an image without having to +// decode the full file, you can use the stbi_info family of functions: +// +// int x,y,n,ok; +// ok = stbi_info(filename, &x, &y, &n); +// // returns ok=1 and sets x, y, n if image is a supported format, +// // 0 otherwise. +// +// Note that stb_image pervasively uses ints in its public API for sizes, +// including sizes of memory buffers. This is now part of the API and thus +// hard to change without causing breakage. As a result, the various image +// loaders all have certain limits on image size; these differ somewhat +// by format but generally boil down to either just under 2GB or just under +// 1GB. When the decoded image would be larger than this, stb_image decoding +// will fail. +// +// Additionally, stb_image will reject image files that have any of their +// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, +// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, +// the only way to have an image with such dimensions load correctly +// is for it to have a rather extreme aspect ratio. Either way, the +// assumption here is that such larger images are likely to be malformed +// or malicious. If you do need to load an image with individual dimensions +// larger than that, and it still fits in the overall size limit, you can +// #define STBI_MAX_DIMENSIONS on your own to be something larger. +// +// =========================================================================== +// +// UNICODE: +// +// If compiling for Windows and you wish to use Unicode filenames, compile +// with +// #define STBI_WINDOWS_UTF8 +// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +// Windows wchar_t filenames to utf8. +// +// =========================================================================== +// +// Philosophy +// +// stb libraries are designed with the following priorities: +// +// 1. easy to use +// 2. easy to maintain +// 3. good performance +// +// Sometimes I let "good performance" creep up in priority over "easy to maintain", +// and for best performance I may provide less-easy-to-use APIs that give higher +// performance, in addition to the easy-to-use ones. Nevertheless, it's important +// to keep in mind that from the standpoint of you, a client of this library, +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. +// +// Some secondary priorities arise directly from the first two, some of which +// provide more explicit reasons why performance can't be emphasized. +// +// - Portable ("ease of use") +// - Small source code footprint ("easy to maintain") +// - No dependencies ("ease of use") +// +// =========================================================================== +// +// I/O callbacks +// +// I/O callbacks allow you to read from arbitrary sources, like packaged +// files or some other source. Data read from callbacks are processed +// through a small internal buffer (currently 128 bytes) to try to reduce +// overhead. +// +// The three functions you must define are "read" (reads some bytes of data), +// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). +// +// =========================================================================== +// +// SIMD support +// +// The JPEG decoder will try to automatically use SIMD kernels on x86 when +// supported by the compiler. For ARM Neon support, you must explicitly +// request it. +// +// (The old do-it-yourself SIMD API is no longer supported in the current +// code.) +// +// On x86, SSE2 will automatically be used when available based on a run-time +// test; if not, the generic C versions are used as a fall-back. On ARM targets, +// the typical path is to have separate builds for NEON and non-NEON devices +// (at least this is true for iOS and Android). Therefore, the NEON support is +// toggled by a build flag: define STBI_NEON to get NEON loops. +// +// If for some reason you do not want to use any of SIMD code, or if +// you have issues compiling it, you can disable it entirely by +// defining STBI_NO_SIMD. +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); +// +// =========================================================================== +// +// iPhone PNG support: +// +// We optionally support converting iPhone-formatted PNGs (which store +// premultiplied BGRA) back to RGB, even though they're internally encoded +// differently. To enable this conversion, call +// stbi_convert_iphone_png_to_rgb(1). +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +// than that size (in either width or height) without further processing. +// This is to let programs in the wild set an upper bound to prevent +// denial-of-service attacks on untrusted data, as one could generate a +// valid image of gigantic dimensions and force stb_image to allocate a +// huge block of memory and spend disproportionate time decoding it. By +// default this is set to (1 << 24), which is 16777216, but that's still +// very big. + +#ifndef STBI_NO_STDIO +#include +#endif // STBI_NO_STDIO + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for desired_channels + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +#include +typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef STBIDEF +#ifdef STB_IMAGE_STATIC +#define STBIDEF static +#else +#define STBIDEF extern +#endif +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// PRIMARY API - works on images of any type +// + +// +// load image by filename, open file, or memory buffer +// + +typedef struct +{ + int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read + void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative + int (*eof) (void *user); // returns nonzero if we are at end of file/data +} stbi_io_callbacks; + +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// +#ifndef STBI_NO_LINEAR + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + + #ifndef STBI_NO_STDIO + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); + #endif +#endif + +#ifndef STBI_NO_HDR + STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); + STBIDEF void stbi_hdr_to_ldr_scale(float scale); +#endif // STBI_NO_HDR + +#ifndef STBI_NO_LINEAR + STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); + STBIDEF void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_LINEAR + +// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename); +STBIDEF int stbi_is_hdr_from_file(FILE *f); +#endif // STBI_NO_STDIO + + +// get a VERY brief reason for failure +// on most compilers (and ALL modern mainstream compilers) this is threadsafe +STBIDEF const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +STBIDEF void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); +#endif + + + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + +// flip the image vertically, so the first pixel in the output array is the bottom left +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); + +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + +// ZLIB client - used by PNG, available for other purposes + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); +STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifdef STB_IMAGE_IMPLEMENTATION + +#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ + || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ + || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ + || defined(STBI_ONLY_ZLIB) + #ifndef STBI_ONLY_JPEG + #define STBI_NO_JPEG + #endif + #ifndef STBI_ONLY_PNG + #define STBI_NO_PNG + #endif + #ifndef STBI_ONLY_BMP + #define STBI_NO_BMP + #endif + #ifndef STBI_ONLY_PSD + #define STBI_NO_PSD + #endif + #ifndef STBI_ONLY_TGA + #define STBI_NO_TGA + #endif + #ifndef STBI_ONLY_GIF + #define STBI_NO_GIF + #endif + #ifndef STBI_ONLY_HDR + #define STBI_NO_HDR + #endif + #ifndef STBI_ONLY_PIC + #define STBI_NO_PIC + #endif + #ifndef STBI_ONLY_PNM + #define STBI_NO_PNM + #endif +#endif + +#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) +#define STBI_NO_ZLIB +#endif + + +#include +#include // ptrdiff_t on osx +#include +#include +#include + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +#include // ldexp, pow +#endif + +#ifndef STBI_NO_STDIO +#include +#endif + +#ifndef STBI_ASSERT +#include +#define STBI_ASSERT(x) assert(x) +#endif + +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + + +#ifndef _MSC_VER + #ifdef __cplusplus + #define stbi_inline inline + #else + #define stbi_inline + #endif +#else + #define stbi_inline __forceinline +#endif + +#ifndef STBI_NO_THREAD_LOCALS + #if defined(__cplusplus) && __cplusplus >= 201103L + #define STBI_THREAD_LOCAL thread_local + #elif defined(__GNUC__) && __GNUC__ < 5 + #define STBI_THREAD_LOCAL __thread + #elif defined(_MSC_VER) + #define STBI_THREAD_LOCAL __declspec(thread) + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) + #define STBI_THREAD_LOCAL _Thread_local + #endif + + #ifndef STBI_THREAD_LOCAL + #if defined(__GNUC__) + #define STBI_THREAD_LOCAL __thread + #endif + #endif +#endif + +#ifdef _MSC_VER +typedef unsigned short stbi__uint16; +typedef signed short stbi__int16; +typedef unsigned int stbi__uint32; +typedef signed int stbi__int32; +#else +#include +typedef uint16_t stbi__uint16; +typedef int16_t stbi__int16; +typedef uint32_t stbi__uint32; +typedef int32_t stbi__int32; +#endif + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; + +#ifdef _MSC_VER +#define STBI_NOTUSED(v) (void)(v) +#else +#define STBI_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef _MSC_VER +#define STBI_HAS_LROTL +#endif + +#ifdef STBI_HAS_LROTL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) +#endif + +#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) +// ok +#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) +// ok +#else +#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." +#endif + +#ifndef STBI_MALLOC +#define STBI_MALLOC(sz) malloc(sz) +#define STBI_REALLOC(p,newsz) realloc(p,newsz) +#define STBI_FREE(p) free(p) +#endif + +#ifndef STBI_REALLOC_SIZED +#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) +#endif + +// x86/x64 detection +#if defined(__x86_64__) || defined(_M_X64) +#define STBI__X64_TARGET +#elif defined(__i386) || defined(_M_IX86) +#define STBI__X86_TARGET +#endif + +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) +// gcc doesn't support sse2 intrinsics unless you compile with -msse2, +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. +#define STBI_NO_SIMD +#endif + +#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) +// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET +// +// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the +// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. +// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not +// simultaneously enabling "-mstackrealign". +// +// See https://github.com/nothings/stb/issues/81 for more information. +// +// So default to no SSE2 on 32-bit MinGW. If you've read this far and added +// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. +#define STBI_NO_SIMD +#endif + +#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) +#define STBI_SSE2 +#include + +#ifdef _MSC_VER + +#if _MSC_VER >= 1400 // not VC6 +#include // __cpuid +static int stbi__cpuid3(void) +{ + int info[4]; + __cpuid(info,1); + return info[3]; +} +#else +static int stbi__cpuid3(void) +{ + int res; + __asm { + mov eax,1 + cpuid + mov res,edx + } + return res; +} +#endif + +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + int info3 = stbi__cpuid3(); + return ((info3 >> 26) & 1) != 0; +} +#endif + +#else // assume GCC-style if not VC++ +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) + +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) +{ + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; +} +#endif + +#endif +#endif + +// ARM NEON +#if defined(STBI_NO_SIMD) && defined(STBI_NEON) +#undef STBI_NEON +#endif + +#ifdef STBI_NEON +#include +#ifdef _MSC_VER +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name +#else +#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) +#endif +#endif + +#ifndef STBI_SIMD_ALIGN +#define STBI_SIMD_ALIGN(type, name) type name +#endif + +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + +/////////////////////////////////////////////// +// +// stbi__context struct and start_xxx functions + +// stbi__context structure is our basic context used by all images, so it +// contains all the IO context, plus some basic image information +typedef struct +{ + stbi__uint32 img_x, img_y; + int img_n, img_out_n; + + stbi_io_callbacks io; + void *io_user_data; + + int read_from_callbacks; + int buflen; + stbi_uc buffer_start[128]; + int callback_already_read; + + stbi_uc *img_buffer, *img_buffer_end; + stbi_uc *img_buffer_original, *img_buffer_original_end; +} stbi__context; + + +static void stbi__refill_buffer(stbi__context *s); + +// initialize a memory-decode context +static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) +{ + s->io.read = NULL; + s->read_from_callbacks = 0; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; + s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; +} + +// initialize a callback-based context +static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) +{ + s->io = *c; + s->io_user_data = user; + s->buflen = sizeof(s->buffer_start); + s->read_from_callbacks = 1; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; + stbi__refill_buffer(s); + s->img_buffer_original_end = s->img_buffer_end; +} + +#ifndef STBI_NO_STDIO + +static int stbi__stdio_read(void *user, char *data, int size) +{ + return (int) fread(data,1,size,(FILE*) user); +} + +static void stbi__stdio_skip(void *user, int n) +{ + int ch; + fseek((FILE*) user, n, SEEK_CUR); + ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ + if (ch != EOF) { + ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ + } +} + +static int stbi__stdio_eof(void *user) +{ + return feof((FILE*) user) || ferror((FILE *) user); +} + +static stbi_io_callbacks stbi__stdio_callbacks = +{ + stbi__stdio_read, + stbi__stdio_skip, + stbi__stdio_eof, +}; + +static void stbi__start_file(stbi__context *s, FILE *f) +{ + stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); +} + +//static void stop_file(stbi__context *s) { } + +#endif // !STBI_NO_STDIO + +static void stbi__rewind(stbi__context *s) +{ + // conceptually rewind SHOULD rewind to the beginning of the stream, + // but we just rewind to the beginning of the initial buffer, because + // we only use it after doing 'test', which only ever looks at at most 92 bytes + s->img_buffer = s->img_buffer_original; + s->img_buffer_end = s->img_buffer_original_end; +} + +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + +#ifndef STBI_NO_JPEG +static int stbi__jpeg_test(stbi__context *s); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNG +static int stbi__png_test(stbi__context *s); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_BMP +static int stbi__bmp_test(stbi__context *s); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_TGA +static int stbi__tga_test(stbi__context *s); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); +#endif + +#ifndef STBI_NO_HDR +static int stbi__hdr_test(stbi__context *s); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_test(stbi__context *s); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_GIF +static int stbi__gif_test(stbi__context *s); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); +#endif + +#ifndef STBI_NO_PNM +static int stbi__pnm_test(stbi__context *s); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pnm_is16(stbi__context *s); +#endif + +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; + +STBIDEF const char *stbi_failure_reason(void) +{ + return stbi__g_failure_reason; +} + +#ifndef STBI_NO_FAILURE_STRINGS +static int stbi__err(const char *str) +{ + stbi__g_failure_reason = str; + return 0; +} +#endif + +static void *stbi__malloc(size_t size) +{ + return STBI_MALLOC(size); +} + +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// stbi__err - error +// stbi__errpf - error returning pointer to float +// stbi__errpuc - error returning pointer to unsigned char + +#ifdef STBI_NO_FAILURE_STRINGS + #define stbi__err(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define stbi__err(x,y) stbi__err(y) +#else + #define stbi__err(x,y) stbi__err(x) +#endif + +#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) +#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) + +STBIDEF void stbi_image_free(void *retval_from_stbi_load) +{ + STBI_FREE(retval_from_stbi_load); +} + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_HDR +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +static int stbi__vertically_flip_on_load_global = 0; + +STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_global = flag_true_if_should_flip; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) +{ + stbi__vertically_flip_on_load_local = flag_true_if_should_flip; + stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ + ? stbi__vertically_flip_on_load_local \ + : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + // test the formats with a very explicit header first (at least a FOURCC + // or distinctive magic number first) + #ifndef STBI_NO_PNG + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_BMP + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_GIF + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PSD + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #else + STBI_NOTUSED(bpc); + #endif + #ifndef STBI_NO_PIC + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + + // then the formats that can end up attempting to load with just 1 or 2 + // bytes matching expectations; these are prone to false positives, so + // try them later + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); + #endif + #ifndef STBI_NO_PNM + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); + return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + #ifndef STBI_NO_TGA + // test tga last because it's a crappy test! + if (stbi__tga_test(s)) + return stbi__tga_load(s,x,y,comp,req_comp, ri); + #endif + + return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); +} + +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi_uc *reduced; + + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; + } + } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} +#endif + +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 8) { + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; +} + +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 16) { + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) +static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) +{ + if (stbi__vertically_flip_on_load && result != NULL) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); + } +} +#endif + +#ifndef STBI_NO_STDIO + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + +static FILE *stbi__fopen(char const *filename, char const *mode) +{ + FILE *f; +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != fopen_s(&f, filename, mode)) + f=0; +#else + f = fopen(filename, mode); +#endif + return f; +} + + +STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + unsigned char *result; + if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + +#endif //!STBI_NO_STDIO + +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi__hdr_test(s)) { + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); + if (hdr_data) + stbi__float_postprocess(hdr_data,x,y,comp,req_comp); + return hdr_data; + } + #endif + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); + if (data) + return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); +} + +STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + float *result; + FILE *f = stbi__fopen(filename, "rb"); + if (!f) return stbi__errpf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__context s; + stbi__start_file(&s,f); + return stbi__loadf_main(&s,x,y,comp,req_comp); +} +#endif // !STBI_NO_STDIO + +#endif // !STBI_NO_LINEAR + +// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is +// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always +// reports false! + +STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_is_hdr (char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +STBIDEF int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; + stbi__context s; + stbi__start_file(&s,f); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; + #else + STBI_NOTUSED(f); + return 0; + #endif +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) +{ + #ifndef STBI_NO_HDR + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); + return stbi__hdr_test(&s); + #else + STBI_NOTUSED(clbk); + STBI_NOTUSED(user); + return 0; + #endif +} + +#ifndef STBI_NO_LINEAR +static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; + +STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } +STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } +#endif + +static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; + +STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } +STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + STBI__SCAN_load=0, + STBI__SCAN_type, + STBI__SCAN_header +}; + +static void stbi__refill_buffer(stbi__context *s) +{ + int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); + if (n == 0) { + // at end of file, treat same as if from memory, but need to handle case + // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file + s->read_from_callbacks = 0; + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start+1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} + +stbi_inline static stbi_uc stbi__get8(stbi__context *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + if (s->read_from_callbacks) { + stbi__refill_buffer(s); + return *s->img_buffer++; + } + return 0; +} + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +stbi_inline static int stbi__at_eof(stbi__context *s) +{ + if (s->io.read) { + if (!(s->io.eof)(s->io_user_data)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->read_from_callbacks == 0) return 1; + } + + return s->img_buffer >= s->img_buffer_end; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else +static void stbi__skip(stbi__context *s, int n) +{ + if (n == 0) return; // already there! + if (n < 0) { + s->img_buffer = s->img_buffer_end; + return; + } + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + (s->io.skip)(s->io_user_data, n - blen); + return; + } + } + s->img_buffer += n; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else +static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) +{ + if (s->io.read) { + int blen = (int) (s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res, count; + + memcpy(buffer, s->img_buffer, blen); + + count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); + res = (count == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } + + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} +#endif + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static int stbi__get16be(stbi__context *s) +{ + int z = stbi__get8(s); + return (z << 8) + stbi__get8(s); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else +static stbi__uint32 stbi__get32be(stbi__context *s) +{ + stbi__uint32 z = stbi__get16be(s); + return (z << 16) + stbi__get16be(s); +} +#endif + +#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) +// nothing +#else +static int stbi__get16le(stbi__context *s) +{ + int z = stbi__get8(s); + return z + (stbi__get8(s) << 8); +} +#endif + +#ifndef STBI_NO_BMP +static stbi__uint32 stbi__get32le(stbi__context *s) +{ + stbi__uint32 z = stbi__get16le(s); + z += (stbi__uint32)stbi__get16le(s) << 16; + return z; +} +#endif + +#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings + +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static stbi_uc stbi__compute_y(int r, int g, int b) +{ + return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else +static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); + if (good == NULL) { + STBI_FREE(data); + return stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif + +#ifndef STBI_NO_LINEAR +static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); + } + } + if (n < comp) { + for (i=0; i < x*y; ++i) { + output[i*comp + n] = data[i*comp + n]/255.0f; + } + } + STBI_FREE(data); + return output; +} +#endif + +#ifndef STBI_NO_HDR +#define stbi__float2int(x) ((int) (x)) +static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); + if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (stbi_uc) stbi__float2int(z); + } + } + STBI_FREE(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder +// +// simple implementation +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - some SIMD kernels for common paths on targets with SSE2/NEON +// - uses a lot of intermediate memory, could cache poorly + +#ifndef STBI_NO_JPEG + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + stbi_uc fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + stbi__uint16 code[256]; + stbi_uc values[256]; + stbi_uc size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} stbi__huffman; + +typedef struct +{ + stbi__context *s; + stbi__huffman huff_dc[4]; + stbi__huffman huff_ac[4]; + stbi__uint16 dequant[4][64]; + stbi__int16 fast_ac[4][1 << FAST_BITS]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + stbi_uc *data; + void *raw_data, *raw_coeff; + stbi_uc *linebuf; + short *coeff; // progressive only + int coeff_w, coeff_h; // number of 8x8 coefficient blocks + } img_comp[4]; + + stbi__uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int progressive; + int spec_start; + int spec_end; + int succ_high; + int succ_low; + int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag + int rgb; + + int scan_n, order[4]; + int restart_interval, todo; + +// kernels + void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); + void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); + stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); +} stbi__jpeg; + +static int stbi__build_huffman(stbi__huffman *h, int *count) +{ + int i,j,k=0; + unsigned int code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (stbi_uc) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (stbi__uint16) (code++); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (stbi_uc) i; + } + } + } + return 1; +} + +// build a table that decodes both magnitude and value of small ACs in +// one go. +static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) +{ + int i; + for (i=0; i < (1 << FAST_BITS); ++i) { + stbi_uc fast = h->fast[i]; + fast_ac[i] = 0; + if (fast < 255) { + int rs = h->values[fast]; + int run = (rs >> 4) & 15; + int magbits = rs & 15; + int len = h->size[fast]; + + if (magbits && len + magbits <= FAST_BITS) { + // magnitude code followed by receive_extend code + int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); + int m = 1 << (magbits - 1); + if (k < m) k += (~0U << magbits) + 1; + // if the result is small enough, we can fit it in fast_ac table + if (k >= -128 && k <= 127) + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); + } + } + } +} + +static void stbi__grow_buffer_unsafe(stbi__jpeg *j) +{ + do { + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); + if (b == 0xff) { + int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + + sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k + (stbi__jbias[n] & (sgn - 1)); +} + +// get some unsigned bits +stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) +{ + unsigned int k; + if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~stbi__bmask[n]; + k &= stbi__bmask[n]; + j->code_bits -= n; + return k; +} + +stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) +{ + unsigned int k; + if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + k = j->code_buffer; + j->code_buffer <<= 1; + --j->code_bits; + return k & 0x80000000; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static const stbi_uc stbi__jpeg_dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) +{ + int diff,dc,k; + int t; + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? stbi__extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * dequant[0]); + + // decode AC components, see JPEG spec + k = 1; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * dequant[zig]); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); + } + } + } while (k < 64); + return 1; +} + +static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) +{ + int diff,dc; + int t; + if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + + if (j->succ_high == 0) { + // first scan for DC coefficient, must be first + memset(data,0,64*sizeof(data[0])); // 0 all the ac values now + t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + diff = t ? stbi__extend_receive(j, t) : 0; + + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) (dc * (1 << j->succ_low)); + } else { + // refinement scan for DC coefficient + if (stbi__jpeg_get_bit(j)) + data[0] += (short) (1 << j->succ_low); + } + return 1; +} + +// @OPTIMIZE: store non-zigzagged during the decode passes, +// and only de-zigzag when dequantizing +static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) +{ + int k; + if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + + if (j->succ_high == 0) { + int shift = j->succ_low; + + if (j->eob_run) { + --j->eob_run; + return 1; + } + + k = j->spec_start; + do { + unsigned int zig; + int c,r,s; + if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + r = fac[c]; + if (r) { // fast-AC path + k += (r >> 4) & 15; // run + s = r & 15; // combined length + j->code_buffer <<= s; + j->code_bits -= s; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) ((r >> 8) * (1 << shift)); + } else { + int rs = stbi__jpeg_huff_decode(j, hac); + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r); + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + --j->eob_run; + break; + } + k += 16; + } else { + k += r; + zig = stbi__jpeg_dezigzag[k++]; + data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); + } + } + } while (k <= j->spec_end); + } else { + // refinement scan for these AC coefficients + + short bit = (short) (1 << j->succ_low); + + if (j->eob_run) { + --j->eob_run; + for (k = j->spec_start; k <= j->spec_end; ++k) { + short *p = &data[stbi__jpeg_dezigzag[k]]; + if (*p != 0) + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } + } else { + k = j->spec_start; + do { + int r,s; + int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh + if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (r < 15) { + j->eob_run = (1 << r) - 1; + if (r) + j->eob_run += stbi__jpeg_get_bits(j, r); + r = 64; // force end of block + } else { + // r=15 s=0 should write 16 0s, so we just do + // a run of 15 0s and then write s (which is 0), + // so we don't have to do anything special here + } + } else { + if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); + // sign bit + if (stbi__jpeg_get_bit(j)) + s = bit; + else + s = -bit; + } + + // advance by r + while (k <= j->spec_end) { + short *p = &data[stbi__jpeg_dezigzag[k++]]; + if (*p != 0) { + if (stbi__jpeg_get_bit(j)) + if ((*p & bit)==0) { + if (*p > 0) + *p += bit; + else + *p -= bit; + } + } else { + if (r == 0) { + *p = (short) s; + break; + } + --r; + } + } + } while (k <= j->spec_end); + } + } + return 1; +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +stbi_inline static stbi_uc stbi__clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (stbi_uc) x; +} + +#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) +#define stbi__fsh(x) ((x) * 4096) + +// derived from jidctint -- DCT_ISLOW +#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * stbi__f2f(0.5411961f); \ + t2 = p1 + p3*stbi__f2f(-1.847759065f); \ + t3 = p1 + p2*stbi__f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = stbi__fsh(p2+p3); \ + t1 = stbi__fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ + t0 = t0*stbi__f2f( 0.298631336f); \ + t1 = t1*stbi__f2f( 2.053119869f); \ + t2 = t2*stbi__f2f( 3.072711026f); \ + t3 = t3*stbi__f2f( 1.501321110f); \ + p1 = p5 + p1*stbi__f2f(-0.899976223f); \ + p2 = p5 + p2*stbi__f2f(-2.562915447f); \ + p3 = p3*stbi__f2f(-1.961570560f); \ + p4 = p4*stbi__f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) +{ + int i,val[64],*v=val; + stbi_uc *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0]*4; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = stbi__clamp((x0+t3) >> 17); + o[7] = stbi__clamp((x0-t3) >> 17); + o[1] = stbi__clamp((x1+t2) >> 17); + o[6] = stbi__clamp((x1-t2) >> 17); + o[2] = stbi__clamp((x2+t1) >> 17); + o[5] = stbi__clamp((x2-t1) >> 17); + o[3] = stbi__clamp((x3+t0) >> 17); + o[4] = stbi__clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SSE2 +// sse2 integer IDCT. not the fastest possible implementation but it +// produces bit-identical results to the generic C version so it's +// fully "transparent". +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + // This is constructed to match our regular (generic) integer IDCT exactly. + __m128i row0, row1, row2, row3, row4, row5, row6, row7; + __m128i tmp; + + // dot product constant: even elems=x, odd elems=y + #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) + + // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) + // out(1) = c1[even]*x + c1[odd]*y + #define dct_rot(out0,out1, x,y,c0,c1) \ + __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ + __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ + __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ + __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ + __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ + __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) + + // out = in << 12 (in 16-bit, out 32-bit) + #define dct_widen(out, in) \ + __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ + __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) + + // wide add + #define dct_wadd(out, a, b) \ + __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_add_epi32(a##_h, b##_h) + + // wide sub + #define dct_wsub(out, a, b) \ + __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ + __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) + + // butterfly a/b, add bias, then shift by "s" and pack + #define dct_bfly32o(out0, out1, a,b,bias,s) \ + { \ + __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ + __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ + dct_wadd(sum, abiased, b); \ + dct_wsub(dif, abiased, b); \ + out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ + out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ + } + + // 8-bit interleave step (for transposes) + #define dct_interleave8(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi8(a, b); \ + b = _mm_unpackhi_epi8(tmp, b) + + // 16-bit interleave step (for transposes) + #define dct_interleave16(a, b) \ + tmp = a; \ + a = _mm_unpacklo_epi16(a, b); \ + b = _mm_unpackhi_epi16(tmp, b) + + #define dct_pass(bias,shift) \ + { \ + /* even part */ \ + dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ + __m128i sum04 = _mm_add_epi16(row0, row4); \ + __m128i dif04 = _mm_sub_epi16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ + dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ + __m128i sum17 = _mm_add_epi16(row1, row7); \ + __m128i sum35 = _mm_add_epi16(row3, row5); \ + dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ + dct_wadd(x4, y0o, y4o); \ + dct_wadd(x5, y1o, y5o); \ + dct_wadd(x6, y2o, y5o); \ + dct_wadd(x7, y3o, y4o); \ + dct_bfly32o(row0,row7, x0,x7,bias,shift); \ + dct_bfly32o(row1,row6, x1,x6,bias,shift); \ + dct_bfly32o(row2,row5, x2,x5,bias,shift); \ + dct_bfly32o(row3,row4, x3,x4,bias,shift); \ + } + + __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); + __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); + __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); + __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); + __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); + __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); + __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); + __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); + + // rounding biases in column/row passes, see stbi__idct_block for explanation. + __m128i bias_0 = _mm_set1_epi32(512); + __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); + + // load + row0 = _mm_load_si128((const __m128i *) (data + 0*8)); + row1 = _mm_load_si128((const __m128i *) (data + 1*8)); + row2 = _mm_load_si128((const __m128i *) (data + 2*8)); + row3 = _mm_load_si128((const __m128i *) (data + 3*8)); + row4 = _mm_load_si128((const __m128i *) (data + 4*8)); + row5 = _mm_load_si128((const __m128i *) (data + 5*8)); + row6 = _mm_load_si128((const __m128i *) (data + 6*8)); + row7 = _mm_load_si128((const __m128i *) (data + 7*8)); + + // column pass + dct_pass(bias_0, 10); + + { + // 16bit 8x8 transpose pass 1 + dct_interleave16(row0, row4); + dct_interleave16(row1, row5); + dct_interleave16(row2, row6); + dct_interleave16(row3, row7); + + // transpose pass 2 + dct_interleave16(row0, row2); + dct_interleave16(row1, row3); + dct_interleave16(row4, row6); + dct_interleave16(row5, row7); + + // transpose pass 3 + dct_interleave16(row0, row1); + dct_interleave16(row2, row3); + dct_interleave16(row4, row5); + dct_interleave16(row6, row7); + } + + // row pass + dct_pass(bias_1, 17); + + { + // pack + __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 + __m128i p1 = _mm_packus_epi16(row2, row3); + __m128i p2 = _mm_packus_epi16(row4, row5); + __m128i p3 = _mm_packus_epi16(row6, row7); + + // 8bit 8x8 transpose pass 1 + dct_interleave8(p0, p2); // a0e0a1e1... + dct_interleave8(p1, p3); // c0g0c1g1... + + // transpose pass 2 + dct_interleave8(p0, p1); // a0c0e0g0... + dct_interleave8(p2, p3); // b0d0f0h0... + + // transpose pass 3 + dct_interleave8(p0, p2); // a0b0c0d0... + dct_interleave8(p1, p3); // a4b4c4d4... + + // store + _mm_storel_epi64((__m128i *) out, p0); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p2); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p1); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; + _mm_storel_epi64((__m128i *) out, p3); out += out_stride; + _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); + } + +#undef dct_const +#undef dct_rot +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_interleave8 +#undef dct_interleave16 +#undef dct_pass +} + +#endif // STBI_SSE2 + +#ifdef STBI_NEON + +// NEON integer IDCT. should produce bit-identical +// results to the generic C version. +static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) +{ + int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; + + int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); + int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); + int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); + int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); + int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); + int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); + int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); + int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); + int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); + int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); + int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); + int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); + +#define dct_long_mul(out, inq, coeff) \ + int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) + +#define dct_long_mac(out, acc, inq, coeff) \ + int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ + int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) + +#define dct_widen(out, inq) \ + int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ + int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) + +// wide add +#define dct_wadd(out, a, b) \ + int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vaddq_s32(a##_h, b##_h) + +// wide sub +#define dct_wsub(out, a, b) \ + int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ + int32x4_t out##_h = vsubq_s32(a##_h, b##_h) + +// butterfly a/b, then shift using "shiftop" by "s" and pack +#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ + { \ + dct_wadd(sum, a, b); \ + dct_wsub(dif, a, b); \ + out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ + out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ + } + +#define dct_pass(shiftop, shift) \ + { \ + /* even part */ \ + int16x8_t sum26 = vaddq_s16(row2, row6); \ + dct_long_mul(p1e, sum26, rot0_0); \ + dct_long_mac(t2e, p1e, row6, rot0_1); \ + dct_long_mac(t3e, p1e, row2, rot0_2); \ + int16x8_t sum04 = vaddq_s16(row0, row4); \ + int16x8_t dif04 = vsubq_s16(row0, row4); \ + dct_widen(t0e, sum04); \ + dct_widen(t1e, dif04); \ + dct_wadd(x0, t0e, t3e); \ + dct_wsub(x3, t0e, t3e); \ + dct_wadd(x1, t1e, t2e); \ + dct_wsub(x2, t1e, t2e); \ + /* odd part */ \ + int16x8_t sum15 = vaddq_s16(row1, row5); \ + int16x8_t sum17 = vaddq_s16(row1, row7); \ + int16x8_t sum35 = vaddq_s16(row3, row5); \ + int16x8_t sum37 = vaddq_s16(row3, row7); \ + int16x8_t sumodd = vaddq_s16(sum17, sum35); \ + dct_long_mul(p5o, sumodd, rot1_0); \ + dct_long_mac(p1o, p5o, sum17, rot1_1); \ + dct_long_mac(p2o, p5o, sum35, rot1_2); \ + dct_long_mul(p3o, sum37, rot2_0); \ + dct_long_mul(p4o, sum15, rot2_1); \ + dct_wadd(sump13o, p1o, p3o); \ + dct_wadd(sump24o, p2o, p4o); \ + dct_wadd(sump23o, p2o, p3o); \ + dct_wadd(sump14o, p1o, p4o); \ + dct_long_mac(x4, sump13o, row7, rot3_0); \ + dct_long_mac(x5, sump24o, row5, rot3_1); \ + dct_long_mac(x6, sump23o, row3, rot3_2); \ + dct_long_mac(x7, sump14o, row1, rot3_3); \ + dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ + dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ + dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ + dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ + } + + // load + row0 = vld1q_s16(data + 0*8); + row1 = vld1q_s16(data + 1*8); + row2 = vld1q_s16(data + 2*8); + row3 = vld1q_s16(data + 3*8); + row4 = vld1q_s16(data + 4*8); + row5 = vld1q_s16(data + 5*8); + row6 = vld1q_s16(data + 6*8); + row7 = vld1q_s16(data + 7*8); + + // add DC bias + row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); + + // column pass + dct_pass(vrshrn_n_s32, 10); + + // 16bit 8x8 transpose + { +// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. +// whether compilers actually get this is another story, sadly. +#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } +#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } + + // pass 1 + dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 + dct_trn16(row2, row3); + dct_trn16(row4, row5); + dct_trn16(row6, row7); + + // pass 2 + dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 + dct_trn32(row1, row3); + dct_trn32(row4, row6); + dct_trn32(row5, row7); + + // pass 3 + dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 + dct_trn64(row1, row5); + dct_trn64(row2, row6); + dct_trn64(row3, row7); + +#undef dct_trn16 +#undef dct_trn32 +#undef dct_trn64 + } + + // row pass + // vrshrn_n_s32 only supports shifts up to 16, we need + // 17. so do a non-rounding shift of 16 first then follow + // up with a rounding shift by 1. + dct_pass(vshrn_n_s32, 16); + + { + // pack and round + uint8x8_t p0 = vqrshrun_n_s16(row0, 1); + uint8x8_t p1 = vqrshrun_n_s16(row1, 1); + uint8x8_t p2 = vqrshrun_n_s16(row2, 1); + uint8x8_t p3 = vqrshrun_n_s16(row3, 1); + uint8x8_t p4 = vqrshrun_n_s16(row4, 1); + uint8x8_t p5 = vqrshrun_n_s16(row5, 1); + uint8x8_t p6 = vqrshrun_n_s16(row6, 1); + uint8x8_t p7 = vqrshrun_n_s16(row7, 1); + + // again, these can translate into one instruction, but often don't. +#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } +#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } +#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } + + // sadly can't use interleaved stores here since we only write + // 8 bytes to each scan line! + + // 8x8 8-bit transpose pass 1 + dct_trn8_8(p0, p1); + dct_trn8_8(p2, p3); + dct_trn8_8(p4, p5); + dct_trn8_8(p6, p7); + + // pass 2 + dct_trn8_16(p0, p2); + dct_trn8_16(p1, p3); + dct_trn8_16(p4, p6); + dct_trn8_16(p5, p7); + + // pass 3 + dct_trn8_32(p0, p4); + dct_trn8_32(p1, p5); + dct_trn8_32(p2, p6); + dct_trn8_32(p3, p7); + + // store + vst1_u8(out, p0); out += out_stride; + vst1_u8(out, p1); out += out_stride; + vst1_u8(out, p2); out += out_stride; + vst1_u8(out, p3); out += out_stride; + vst1_u8(out, p4); out += out_stride; + vst1_u8(out, p5); out += out_stride; + vst1_u8(out, p6); out += out_stride; + vst1_u8(out, p7); + +#undef dct_trn8_8 +#undef dct_trn8_16 +#undef dct_trn8_32 + } + +#undef dct_long_mul +#undef dct_long_mac +#undef dct_widen +#undef dct_wadd +#undef dct_wsub +#undef dct_bfly32o +#undef dct_pass +} + +#endif // STBI_NEON + +#define STBI__MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static stbi_uc stbi__get_marker(stbi__jpeg *j) +{ + stbi_uc x; + if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } + x = stbi__get8(j->s); + if (x != 0xff) return STBI__MARKER_none; + while (x == 0xff) + x = stbi__get8(j->s); // consume repeated 0xff fill bytes + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, stbi__jpeg_reset the entropy decoder and +// the dc prediction +static void stbi__jpeg_reset(stbi__jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; + j->marker = STBI__MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + j->eob_run = 0; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int stbi__parse_entropy_coded_data(stbi__jpeg *z) +{ + stbi__jpeg_reset(z); + if (!z->progressive) { + if (z->scan_n == 1) { + int i,j; + STBI_SIMD_ALIGN(short, data[64]); + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + STBI_SIMD_ALIGN(short, data[64]); + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } else { + if (z->scan_n == 1) { + int i,j; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + if (z->spec_start == 0) { + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } else { + int ha = z->img_comp[n].ha; + if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) + return 0; + } + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } else { // interleaved + int i,j,k,x,y; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x); + int y2 = (j*z->img_comp[n].v + y); + short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); + if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) + return 0; + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); + if (!STBI__RESTART(z->marker)) return 1; + stbi__jpeg_reset(z); + } + } + } + return 1; + } + } +} + +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) +{ + int i; + for (i=0; i < 64; ++i) + data[i] *= dequant[i]; +} + +static void stbi__jpeg_finish(stbi__jpeg *z) +{ + if (z->progressive) { + // dequantize and idct the data + int i,j,n; + for (n=0; n < z->s->img_n; ++n) { + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); + stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); + z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); + } + } + } + } +} + +static int stbi__process_marker(stbi__jpeg *z, int m) +{ + int L; + switch (m) { + case STBI__MARKER_none: // no marker found + return stbi__err("expected marker","Corrupt JPEG"); + + case 0xDD: // DRI - specify restart interval + if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); + z->restart_interval = stbi__get16be(z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = stbi__get16be(z->s)-2; + while (L > 0) { + int q = stbi__get8(z->s); + int p = q >> 4, sixteen = (p != 0); + int t = q & 15,i; + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); + if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + + for (i=0; i < 64; ++i) + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = stbi__get16be(z->s)-2; + while (L > 0) { + stbi_uc *v; + int sizes[16],i,n=0; + int q = stbi__get8(z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = stbi__get8(z->s); + n += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < n; ++i) + v[i] = stbi__get8(z->s); + if (tc != 0) + stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); + L -= n; + } + return L==0; + } + + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); + return 1; + } + + return stbi__err("unknown marker","Corrupt JPEG"); +} + +// after we see SOS +static int stbi__process_scan_header(stbi__jpeg *z) +{ + int i; + int Ls = stbi__get16be(z->s); + z->scan_n = stbi__get8(z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = stbi__get8(z->s), which; + int q = stbi__get8(z->s); + for (which = 0; which < z->s->img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s->img_n) return 0; // no match + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + + { + int aa; + z->spec_start = stbi__get8(z->s); + z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 + aa = stbi__get8(z->s); + z->succ_high = (aa >> 4); + z->succ_low = (aa & 15); + if (z->progressive) { + if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) + return stbi__err("bad SOS", "Corrupt JPEG"); + } else { + if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); + if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); + z->spec_end = 63; + } + } + + return 1; +} + +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + +static int stbi__process_frame_header(stbi__jpeg *z, int scan) +{ + stbi__context *s = z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG + p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + c = stbi__get8(s); + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); + + z->rgb = 0; + for (i=0; i < s->img_n; ++i) { + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; + z->img_comp[i].id = stbi__get8(s); + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; + q = stbi__get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); + z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); + } + + if (scan != STBI__SCAN_load) return 1; + + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + // align blocks for idct using mmx/sse + z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + if (z->progressive) { + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); + z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); + } + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define stbi__DNL(x) ((x) == 0xdc) +#define stbi__SOI(x) ((x) == 0xd8) +#define stbi__EOI(x) ((x) == 0xd9) +#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) +#define stbi__SOS(x) ((x) == 0xda) + +#define stbi__SOF_progressive(x) ((x) == 0xc2) + +static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) +{ + int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 + z->marker = STBI__MARKER_none; // initialize cached marker to empty + m = stbi__get_marker(z); + if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); + if (scan == STBI__SCAN_type) return 1; + m = stbi__get_marker(z); + while (!stbi__SOF(m)) { + if (!stbi__process_marker(z,m)) return 0; + m = stbi__get_marker(z); + while (m == STBI__MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); + m = stbi__get_marker(z); + } + } + z->progressive = stbi__SOF_progressive(m); + if (!stbi__process_frame_header(z, scan)) return 0; + return 1; +} + +// decode image to YCbCr format +static int stbi__decode_jpeg_image(stbi__jpeg *j) +{ + int m; + for (m = 0; m < 4; m++) { + j->img_comp[m].raw_data = NULL; + j->img_comp[m].raw_coeff = NULL; + } + j->restart_interval = 0; + if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; + m = stbi__get_marker(j); + while (!stbi__EOI(m)) { + if (stbi__SOS(m)) { + if (!stbi__process_scan_header(j)) return 0; + if (!stbi__parse_entropy_coded_data(j)) return 0; + if (j->marker == STBI__MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!stbi__at_eof(j->s)) { + int x = stbi__get8(j->s); + if (x == 255) { + j->marker = stbi__get8(j->s); + break; + } + } + // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 + } + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + } else { + if (!stbi__process_marker(j, m)) return 0; + } + m = stbi__get_marker(j); + } + if (j->progressive) + stbi__jpeg_finish(j); + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, + int w, int hs); + +#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) + +static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + stbi_uc *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = stbi__div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = stbi__div4(n+input[i-1]); + out[i*2+1] = stbi__div4(n+input[i+1]); + } + out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) + +static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = stbi__div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i=0,t0,t1; + + if (w == 1) { + out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + // process groups of 8 pixels for as long as we can. + // note we can't handle the last pixel in a row in this loop + // because we need to handle the filter boundary conditions. + for (; i < ((w-1) & ~7); i += 8) { +#if defined(STBI_SSE2) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + __m128i zero = _mm_setzero_si128(); + __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); + __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); + __m128i farw = _mm_unpacklo_epi8(farb, zero); + __m128i nearw = _mm_unpacklo_epi8(nearb, zero); + __m128i diff = _mm_sub_epi16(farw, nearw); + __m128i nears = _mm_slli_epi16(nearw, 2); + __m128i curr = _mm_add_epi16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + __m128i prv0 = _mm_slli_si128(curr, 2); + __m128i nxt0 = _mm_srli_si128(curr, 2); + __m128i prev = _mm_insert_epi16(prv0, t1, 0); + __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + __m128i bias = _mm_set1_epi16(8); + __m128i curs = _mm_slli_epi16(curr, 2); + __m128i prvd = _mm_sub_epi16(prev, curr); + __m128i nxtd = _mm_sub_epi16(next, curr); + __m128i curb = _mm_add_epi16(curs, bias); + __m128i even = _mm_add_epi16(prvd, curb); + __m128i odd = _mm_add_epi16(nxtd, curb); + + // interleave even and odd pixels, then undo scaling. + __m128i int0 = _mm_unpacklo_epi16(even, odd); + __m128i int1 = _mm_unpackhi_epi16(even, odd); + __m128i de0 = _mm_srli_epi16(int0, 4); + __m128i de1 = _mm_srli_epi16(int1, 4); + + // pack and write output + __m128i outv = _mm_packus_epi16(de0, de1); + _mm_storeu_si128((__m128i *) (out + i*2), outv); +#elif defined(STBI_NEON) + // load and perform the vertical filtering pass + // this uses 3*x + y = 4*x + (y - x) + uint8x8_t farb = vld1_u8(in_far + i); + uint8x8_t nearb = vld1_u8(in_near + i); + int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); + int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); + int16x8_t curr = vaddq_s16(nears, diff); // current row + + // horizontal filter works the same based on shifted vers of current + // row. "prev" is current row shifted right by 1 pixel; we need to + // insert the previous pixel value (from t1). + // "next" is current row shifted left by 1 pixel, with first pixel + // of next block of 8 pixels added in. + int16x8_t prv0 = vextq_s16(curr, curr, 7); + int16x8_t nxt0 = vextq_s16(curr, curr, 1); + int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); + int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); + + // horizontal filter, polyphase implementation since it's convenient: + // even pixels = 3*cur + prev = cur*4 + (prev - cur) + // odd pixels = 3*cur + next = cur*4 + (next - cur) + // note the shared term. + int16x8_t curs = vshlq_n_s16(curr, 2); + int16x8_t prvd = vsubq_s16(prev, curr); + int16x8_t nxtd = vsubq_s16(next, curr); + int16x8_t even = vaddq_s16(curs, prvd); + int16x8_t odd = vaddq_s16(curs, nxtd); + + // undo scaling and round, then store with even/odd phases interleaved + uint8x8x2_t o; + o.val[0] = vqrshrun_n_s16(even, 4); + o.val[1] = vqrshrun_n_s16(odd, 4); + vst2_u8(out + i*2, o); +#endif + + // "previous" value for next iter + t1 = 3*in_near[i+7] + in_far[i+7]; + } + + t0 = t1; + t1 = 3*in_near[i] + in_far[i]; + out[i*2] = stbi__div16(3*t1 + t0 + 8); + + for (++i; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = stbi__div16(3*t0 + t1 + 8); + out[i*2 ] = stbi__div16(3*t1 + t0 + 8); + } + out[w*2-1] = stbi__div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} +#endif + +static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + STBI_NOTUSED(in_far); + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +// this is a reduced-precision calculation of YCbCr-to-RGB introduced +// to make sure the code produces the same results in both SIMD and scalar +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} + +#if defined(STBI_SSE2) || defined(STBI_NEON) +static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) +{ + int i = 0; + +#ifdef STBI_SSE2 + // step == 3 is pretty ugly on the final interleave, and i'm not convinced + // it's useful in practice (you wouldn't use it for textures, for example). + // so just accelerate step == 4 case. + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + __m128i signflip = _mm_set1_epi8(-0x80); + __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); + __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); + __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); + __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); + __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); + __m128i xw = _mm_set1_epi16(255); // alpha channel + + for (; i+7 < count; i += 8) { + // load + __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); + __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); + __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); + __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 + __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 + + // unpack to short (and left-shift cr, cb by 8) + __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); + __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); + __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); + + // color transform + __m128i yws = _mm_srli_epi16(yw, 4); + __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); + __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); + __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); + __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); + __m128i rws = _mm_add_epi16(cr0, yws); + __m128i gwt = _mm_add_epi16(cb0, yws); + __m128i bws = _mm_add_epi16(yws, cb1); + __m128i gws = _mm_add_epi16(gwt, cr1); + + // descale + __m128i rw = _mm_srai_epi16(rws, 4); + __m128i bw = _mm_srai_epi16(bws, 4); + __m128i gw = _mm_srai_epi16(gws, 4); + + // back to byte, set up for transpose + __m128i brb = _mm_packus_epi16(rw, bw); + __m128i gxb = _mm_packus_epi16(gw, xw); + + // transpose to interleave channels + __m128i t0 = _mm_unpacklo_epi8(brb, gxb); + __m128i t1 = _mm_unpackhi_epi8(brb, gxb); + __m128i o0 = _mm_unpacklo_epi16(t0, t1); + __m128i o1 = _mm_unpackhi_epi16(t0, t1); + + // store + _mm_storeu_si128((__m128i *) (out + 0), o0); + _mm_storeu_si128((__m128i *) (out + 16), o1); + out += 32; + } + } +#endif + +#ifdef STBI_NEON + // in this version, step=3 support would be easy to add. but is there demand? + if (step == 4) { + // this is a fairly straightforward implementation and not super-optimized. + uint8x8_t signflip = vdup_n_u8(0x80); + int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); + int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); + int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); + int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); + + for (; i+7 < count; i += 8) { + // load + uint8x8_t y_bytes = vld1_u8(y + i); + uint8x8_t cr_bytes = vld1_u8(pcr + i); + uint8x8_t cb_bytes = vld1_u8(pcb + i); + int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); + int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); + + // expand to s16 + int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); + int16x8_t crw = vshll_n_s8(cr_biased, 7); + int16x8_t cbw = vshll_n_s8(cb_biased, 7); + + // color transform + int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); + int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); + int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); + int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); + int16x8_t rws = vaddq_s16(yws, cr0); + int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); + int16x8_t bws = vaddq_s16(yws, cb1); + + // undo scaling, round, convert to byte + uint8x8x4_t o; + o.val[0] = vqrshrun_n_s16(rws, 4); + o.val[1] = vqrshrun_n_s16(gws, 4); + o.val[2] = vqrshrun_n_s16(bws, 4); + o.val[3] = vdup_n_u8(255); + + // store, interleaving r/g/b/a + vst4_u8(out, o); + out += 8*4; + } + } +#endif + + for (; i < count; ++i) { + int y_fixed = (y[i] << 20) + (1<<19); // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); + r >>= 20; + g >>= 20; + b >>= 20; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (stbi_uc)r; + out[1] = (stbi_uc)g; + out[2] = (stbi_uc)b; + out[3] = 255; + out += step; + } +} +#endif + +// set up the kernels +static void stbi__setup_jpeg(stbi__jpeg *j) +{ + j->idct_block_kernel = stbi__idct_block; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; + +#ifdef STBI_SSE2 + if (stbi__sse2_available()) { + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; + } +#endif + +#ifdef STBI_NEON + j->idct_block_kernel = stbi__idct_simd; + j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; + j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; +#endif +} + +// clean up the temporary component buffers +static void stbi__cleanup_jpeg(stbi__jpeg *j) +{ + stbi__free_jpeg_components(j, j->s->img_n, 0); +} + +typedef struct +{ + resample_row_func resample; + stbi_uc *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi__resample; + +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + +static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n, is_rgb; + z->s->img_n = 0; // make stbi__cleanup_jpeg safe + + // validate req_comp + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + + // load a jpeg image from whichever source, but leave in YCbCr format + if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); + + if (z->s->img_n == 3 && n < 3 && !is_rgb) + decode_n = 1; + else + decode_n = z->s->img_n; + + // nothing to do if no components requested; check this now to avoid + // accessing uninitialized coutput[0] later + if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } + + // resample and color-convert + { + int k; + unsigned int i,j; + stbi_uc *output; + stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; + + stbi__resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); + if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s->img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; + else r->resample = stbi__resample_row_generic; + } + + // can't error after this so, this is safe + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); + if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s->img_y; ++j) { + stbi_uc *out = output + n * z->s->img_x * j; + for (k=0; k < decode_n; ++k) { + stbi__resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + stbi_uc *y = coutput[0]; + if (z->s->img_n == 3) { + if (is_rgb) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = y[i]; + out[1] = coutput[1][i]; + out[2] = coutput[2][i]; + out[3] = 255; + out += n; + } + } else { + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } + } else + for (i=0; i < z->s->img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } + } + } + } + stbi__cleanup_jpeg(z); + *out_x = z->s->img_x; + *out_y = z->s->img_y; + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output + return output; + } +} + +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + unsigned char* result; + stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__errpuc("outofmem", "Out of memory"); + STBI_NOTUSED(ri); + j->s = s; + stbi__setup_jpeg(j); + result = load_jpeg_image(j, x,y,comp,req_comp); + STBI_FREE(j); + return result; +} + +static int stbi__jpeg_test(stbi__context *s) +{ + int r; + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__err("outofmem", "Out of memory"); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); + stbi__rewind(s); + STBI_FREE(j); + return r; +} + +static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) +{ + if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { + stbi__rewind( j->s ); + return 0; + } + if (x) *x = j->s->img_x; + if (y) *y = j->s->img_y; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; + return 1; +} + +static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) +{ + int result; + stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + if (!j) return stbi__err("outofmem", "Out of memory"); + j->s = s; + result = stbi__jpeg_info_raw(j, x, y, comp); + STBI_FREE(j); + return result; +} +#endif + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +#ifndef STBI_NO_ZLIB + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables +#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + stbi__uint16 fast[1 << STBI__ZFAST_BITS]; + stbi__uint16 firstcode[16]; + int maxcode[17]; + stbi__uint16 firstsymbol[16]; + stbi_uc size[STBI__ZNSYMS]; + stbi__uint16 value[STBI__ZNSYMS]; +} stbi__zhuffman; + +stbi_inline static int stbi__bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +stbi_inline static int stbi__bit_reverse(int v, int bits) +{ + STBI_ASSERT(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return stbi__bitreverse16(v) >> (16-bits); +} + +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 0, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + if (sizes[i] > (1 << i)) + return stbi__err("bad sizes", "Corrupt PNG"); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (stbi__uint16) code; + z->firstsymbol[i] = (stbi__uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); + z->size [c] = (stbi_uc ) s; + z->value[c] = (stbi__uint16) i; + if (s <= STBI__ZFAST_BITS) { + int j = stbi__bit_reverse(next_code[s],s); + while (j < (1 << STBI__ZFAST_BITS)) { + z->fast[j] = fastv; + j += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + stbi_uc *zbuffer, *zbuffer_end; + int num_bits; + stbi__uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + stbi__zhuffman z_length, z_distance; +} stbi__zbuf; + +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ + return (z->zbuffer >= z->zbuffer_end); +} + +stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) +{ + return stbi__zeof(z) ? 0 : *z->zbuffer++; +} + +static void stbi__fill_bits(stbi__zbuf *z) +{ + do { + if (z->code_buffer >= (1U << z->num_bits)) { + z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ + return; + } + z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) stbi__fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s,k; + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = stbi__bit_reverse(a->code_buffer, 16); + for (s=STBI__ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s >= 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! + if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) +{ + int b,s; + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + return -1; /* report error for unexpected end of data. */ + } + stbi__fill_bits(a); + } + b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; + if (b) { + s = b >> 9; + a->code_buffer >>= s; + a->num_bits -= s; + return b & 511; + } + return stbi__zhuffman_decode_slowpath(a, z); +} + +static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes +{ + char *q; + unsigned int cur, limit, old_limit; + z->zout = zout; + if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); + cur = (unsigned int) (z->zout - z->zout_start); + limit = old_limit = (unsigned) (z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); + while (cur + n > limit) { + if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); + limit *= 2; + } + q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); + STBI_NOTUSED(old_limit); + if (q == NULL) return stbi__err("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static const int stbi__zlength_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static const int stbi__zlength_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static const int stbi__zdist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int stbi__parse_huffman_block(stbi__zbuf *a) +{ + char *zout = a->zout; + for(;;) { + int z = stbi__zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes + if (zout >= a->zout_end) { + if (!stbi__zexpand(a, zout, 1)) return 0; + zout = a->zout; + } + *zout++ = (char) z; + } else { + stbi_uc *p; + int len,dist; + if (z == 256) { + a->zout = zout; + return 1; + } + z -= 257; + len = stbi__zlength_base[z]; + if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); + z = stbi__zhuffman_decode(a, &a->z_distance); + if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + dist = stbi__zdist_base[z]; + if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); + if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); + if (zout + len > a->zout_end) { + if (!stbi__zexpand(a, zout, len)) return 0; + zout = a->zout; + } + p = (stbi_uc *) (zout - dist); + if (dist == 1) { // run of one byte; common in images. + stbi_uc v = *p; + if (len) { do *zout++ = v; while (--len); } + } else { + if (len) { do *zout++ = *p++; while (--len); } + } + } + } +} + +static int stbi__compute_huffman_codes(stbi__zbuf *a) +{ + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + stbi__zhuffman z_codelength; + stbi_uc lencodes[286+32+137];//padding for maximum single op + stbi_uc codelength_sizes[19]; + int i,n; + + int hlit = stbi__zreceive(a,5) + 257; + int hdist = stbi__zreceive(a,5) + 1; + int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = stbi__zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; + } + if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < ntot) { + int c = stbi__zhuffman_decode(a, &z_codelength); + if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); + if (c < 16) + lencodes[n++] = (stbi_uc) c; + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) { + c = stbi__zreceive(a,3)+3; + } else if (c == 18) { + c = stbi__zreceive(a,7)+11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); + n += c; + } + } + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); + if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int stbi__parse_uncompressed_block(stbi__zbuf *a) +{ + stbi_uc header[4]; + int len,nlen,k; + if (a->num_bits & 7) + stbi__zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check + a->code_buffer >>= 8; + a->num_bits -= 8; + } + if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); + // now fill header the normal way + while (k < 4) + header[k++] = stbi__zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!stbi__zexpand(a, a->zout, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int stbi__parse_zlib_header(stbi__zbuf *a) +{ + int cmf = stbi__zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = stbi__zget8(a); + if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; + for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; + for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; + for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; + + for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; +} +*/ + +static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!stbi__parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = stbi__zreceive(a,1); + type = stbi__zreceive(a,2); + if (type == 0) { + if (!stbi__parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; + if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; + } else { + if (!stbi__compute_huffman_codes(a)) return 0; + } + if (!stbi__parse_huffman_block(a)) return 0; + } + } while (!final); + return 1; +} + +static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return stbi__parse_zlib(a, parse_header); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer + len; + if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + stbi__zbuf a; + char *p = (char *) stbi__malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (stbi_uc *) buffer; + a.zbuffer_end = (stbi_uc *) buffer+len; + if (stbi__do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + STBI_FREE(a.zout_start); + return NULL; + } +} + +STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + stbi__zbuf a; + a.zbuffer = (stbi_uc *) ibuffer; + a.zbuffer_end = (stbi_uc *) ibuffer + ilen; + if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} +#endif + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + +#ifndef STBI_NO_PNG +typedef struct +{ + stbi__uint32 length; + stbi__uint32 type; +} stbi__pngchunk; + +static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) +{ + stbi__pngchunk c; + c.length = stbi__get32be(s); + c.type = stbi__get32be(s); + return c; +} + +static int stbi__check_png_header(stbi__context *s) +{ + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi__context *s; + stbi_uc *idata, *expanded, *out; + int depth; +} stbi__png; + + +enum { + STBI__F_none=0, + STBI__F_sub=1, + STBI__F_up=2, + STBI__F_avg=3, + STBI__F_paeth=4, + // synthetic filters used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static stbi_uc first_row_filter[5] = +{ + STBI__F_none, + STBI__F_sub, + STBI__F_none, + STBI__F_avg_first, + STBI__F_paeth_first +}; + +static int stbi__paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// create the png data from post-deflated data +static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) +{ + int bytes = (depth == 16? 2 : 1); + stbi__context *s = a->s; + stbi__uint32 i,j,stride = x*out_n*bytes; + stbi__uint32 img_len, img_width_bytes; + int k; + int img_n = s->img_n; // copy it into a local for later + + int output_bytes = out_n*bytes; + int filter_bytes = img_n*bytes; + int width = x; + + STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into + if (!a->out) return stbi__err("outofmem", "Out of memory"); + + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); + img_width_bytes = (((img_n * x * depth) + 7) >> 3); + img_len = (img_width_bytes + 1) * y; + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *prior; + int filter = *raw++; + + if (filter > 4) + return stbi__err("invalid filter","Corrupt PNG"); + + if (depth < 8) { + if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG"); + cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place + filter_bytes = 1; + width = img_width_bytes; + } + prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above + + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + + // handle first byte explicitly + for (k=0; k < filter_bytes; ++k) { + switch (filter) { + case STBI__F_none : cur[k] = raw[k]; break; + case STBI__F_sub : cur[k] = raw[k]; break; + case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; + case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; + case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; + case STBI__F_avg_first : cur[k] = raw[k]; break; + case STBI__F_paeth_first: cur[k] = raw[k]; break; + } + } + + if (depth == 8) { + if (img_n != out_n) + cur[img_n] = 255; // first pixel + raw += img_n; + cur += out_n; + prior += out_n; + } else if (depth == 16) { + if (img_n != out_n) { + cur[filter_bytes] = 255; // first pixel top byte + cur[filter_bytes+1] = 255; // first pixel bottom byte + } + raw += filter_bytes; + cur += output_bytes; + prior += output_bytes; + } else { + raw += 1; + cur += 1; + prior += 1; + } + + // this is a little gross, so that we don't switch per-pixel or per-component + if (depth < 8 || img_n == out_n) { + int nk = (width - 1)*filter_bytes; + #define STBI__CASE(f) \ + case f: \ + for (k=0; k < nk; ++k) + switch (filter) { + // "none" filter turns into a memcpy here; make that explicit. + case STBI__F_none: memcpy(cur, raw, nk); break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; + } + #undef STBI__CASE + raw += nk; + } else { + STBI_ASSERT(img_n+1 == out_n); + #define STBI__CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ + for (k=0; k < filter_bytes; ++k) + switch (filter) { + STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; + STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; + STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; + STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; + STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; + STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; + STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; + } + #undef STBI__CASE + + // the loop above sets the high byte of the pixels' alpha, but for + // 16 bit png files we also need the low byte set. we'll do that here. + if (depth == 16) { + cur = a->out + stride*j; // start at the beginning of the row again + for (i=0; i < x; ++i,cur+=output_bytes) { + cur[filter_bytes+1] = 255; + } + } + } + } + + // we make a separate pass to expand bits to pixels; for performance, + // this could run two scanlines behind the above code, so it won't + // intefere with filtering but will still be in the cache. + if (depth < 8) { + for (j=0; j < y; ++j) { + stbi_uc *cur = a->out + stride*j; + stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; + // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit + // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + + // note that the final byte might overshoot and write more data than desired. + // we can allocate enough data that this never writes out of memory, but it + // could also overwrite the next scanline. can it overwrite non-empty data + // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. + // so we need to explicitly clamp the final ones + + if (depth == 4) { + for (k=x*img_n; k >= 2; k-=2, ++in) { + *cur++ = scale * ((*in >> 4) ); + *cur++ = scale * ((*in ) & 0x0f); + } + if (k > 0) *cur++ = scale * ((*in >> 4) ); + } else if (depth == 2) { + for (k=x*img_n; k >= 4; k-=4, ++in) { + *cur++ = scale * ((*in >> 6) ); + *cur++ = scale * ((*in >> 4) & 0x03); + *cur++ = scale * ((*in >> 2) & 0x03); + *cur++ = scale * ((*in ) & 0x03); + } + if (k > 0) *cur++ = scale * ((*in >> 6) ); + if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); + if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); + } else if (depth == 1) { + for (k=x*img_n; k >= 8; k-=8, ++in) { + *cur++ = scale * ((*in >> 7) ); + *cur++ = scale * ((*in >> 6) & 0x01); + *cur++ = scale * ((*in >> 5) & 0x01); + *cur++ = scale * ((*in >> 4) & 0x01); + *cur++ = scale * ((*in >> 3) & 0x01); + *cur++ = scale * ((*in >> 2) & 0x01); + *cur++ = scale * ((*in >> 1) & 0x01); + *cur++ = scale * ((*in ) & 0x01); + } + if (k > 0) *cur++ = scale * ((*in >> 7) ); + if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); + if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); + if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); + if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); + if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); + if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); + } + if (img_n != out_n) { + int q; + // insert alpha = 255 + cur = a->out + stride*j; + if (img_n == 1) { + for (q=x-1; q >= 0; --q) { + cur[q*2+1] = 255; + cur[q*2+0] = cur[q]; + } + } else { + STBI_ASSERT(img_n == 3); + for (q=x-1; q >= 0; --q) { + cur[q*4+3] = 255; + cur[q*4+2] = cur[q*3+2]; + cur[q*4+1] = cur[q*3+1]; + cur[q*4+0] = cur[q*3+0]; + } + } + } + } + } else if (depth == 16) { + // force the image data from big-endian to platform-native. + // this is done in a separate pass due to the decoding relying + // on the data being untouched, but could probably be done + // per-line during decode if care is taken. + stbi_uc *cur = a->out; + stbi__uint16 *cur16 = (stbi__uint16*)cur; + + for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { + *cur16 = (cur[0] << 8) | cur[1]; + } + } + + return 1; +} + +static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) +{ + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; + stbi_uc *final; + int p; + if (!interlaced) + return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); + + // de-interlacing + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + if (!final) return stbi__err("outofmem", "Out of memory"); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; + if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { + STBI_FREE(final); + return 0; + } + for (j=0; j < y; ++j) { + for (i=0; i < x; ++i) { + int out_y = j*yspc[p]+yorig[p]; + int out_x = i*xspc[p]+xorig[p]; + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); + } + } + STBI_FREE(a->out); + image_data += img_len; + image_data_len -= img_len; + } + } + a->out = final; + + return 1; +} + +static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi__uint16 *p = (stbi__uint16*) z->out; + + // compute color-based transparency, assuming we've + // already got 65535 as the alpha value in the output + STBI_ASSERT(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i = 0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 65535); + p += 2; + } + } else { + for (i = 0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) +{ + stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; + stbi_uc *p, *temp_out, *orig = a->out; + + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); + if (p == NULL) return stbi__err("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + STBI_FREE(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi__unpremultiply_on_load_global = 0; +static int stbi__de_iphone_flag_global = 0; + +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_global = flag_true_if_should_convert; +} + +#ifndef STBI_THREAD_LOCAL +#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global +#define stbi__de_iphone_flag stbi__de_iphone_flag_global +#else +static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; +static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; + +STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; + stbi__unpremultiply_on_load_set = 1; +} + +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_local = flag_true_if_should_convert; + stbi__de_iphone_flag_set = 1; +} + +#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ + ? stbi__unpremultiply_on_load_local \ + : stbi__unpremultiply_on_load_global) +#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ + ? stbi__de_iphone_flag_local \ + : stbi__de_iphone_flag_global) +#endif // STBI_THREAD_LOCAL + +static void stbi__de_iphone(stbi__png *z) +{ + stbi__context *s = z->s; + stbi__uint32 i, pixel_count = s->img_x * s->img_y; + stbi_uc *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + STBI_ASSERT(s->img_out_n == 4); + if (stbi__unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + stbi_uc a = p[3]; + stbi_uc t = p[0]; + if (a) { + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + stbi_uc t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) + +static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) +{ + stbi_uc palette[1024], pal_img_n=0; + stbi_uc has_trans=0, tc[3]={0}; + stbi__uint16 tc16[3]; + stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, color=0, is_iphone=0; + stbi__context *s = z->s; + + z->expanded = NULL; + z->idata = NULL; + z->out = NULL; + + if (!stbi__check_png_header(s)) return 0; + + if (scan == STBI__SCAN_type) return 1; + + for (;;) { + stbi__pngchunk c = stbi__get_chunk_header(s); + switch (c.type) { + case STBI__PNG_TYPE('C','g','B','I'): + is_iphone = 1; + stbi__skip(s, c.length); + break; + case STBI__PNG_TYPE('I','H','D','R'): { + int comp,filter; + if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); + s->img_x = stbi__get32be(s); + s->img_y = stbi__get32be(s); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); + color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); + comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); + filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); + interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (scan == STBI__SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case STBI__PNG_TYPE('P','L','T','E'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = stbi__get8(s); + palette[i*4+1] = stbi__get8(s); + palette[i*4+2] = stbi__get8(s); + palette[i*4+3] = 255; + } + break; + } + + case STBI__PNG_TYPE('t','R','N','S'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = stbi__get8(s); + } else { + if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); + if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); + has_trans = 1; + if (z->depth == 16) { + for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is + } else { + for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + } + } + break; + } + + case STBI__PNG_TYPE('I','D','A','T'): { + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); + if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if ((int)(ioff + c.length) < (int)ioff) return 0; + if (ioff + c.length > idata_limit) { + stbi__uint32 idata_limit_old = idata_limit; + stbi_uc *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + STBI_NOTUSED(idata_limit_old); + p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); + z->idata = p; + } + if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case STBI__PNG_TYPE('I','E','N','D'): { + stbi__uint32 raw_len, bpl; + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if (scan != STBI__SCAN_load) return 1; + if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); + // initial guess for decoded data size to avoid unnecessary reallocs + bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component + raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; + z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); + if (z->expanded == NULL) return 0; // zlib should set error + STBI_FREE(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; + if (has_trans) { + if (z->depth == 16) { + if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; + } else { + if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; + } + } + if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) + stbi__de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; + } + STBI_FREE(z->expanded); z->expanded = NULL; + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + return 1; + } + + default: + // if critical, fail + if (first) return stbi__err("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX PNG chunk not known"; + invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); + invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); + invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); + invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); + #endif + return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); + } + stbi__skip(s, c.length); + break; + } + // end of PNG chunk, read and skip CRC + stbi__get32be(s); + } +} + +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) +{ + void *result=NULL; + if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); + if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { + if (p->depth <= 8) + ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; + else + return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s->img_out_n) { + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + p->s->img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s->img_x; + *y = p->s->img_y; + if (n) *n = p->s->img_n; + } + STBI_FREE(p->out); p->out = NULL; + STBI_FREE(p->expanded); p->expanded = NULL; + STBI_FREE(p->idata); p->idata = NULL; + + return result; +} + +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi__png p; + p.s = s; + return stbi__do_png(&p, x,y,comp,req_comp, ri); +} + +static int stbi__png_test(stbi__context *s) +{ + int r; + r = stbi__check_png_header(s); + stbi__rewind(s); + return r; +} + +static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) +{ + if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { + stbi__rewind( p->s ); + return 0; + } + if (x) *x = p->s->img_x; + if (y) *y = p->s->img_y; + if (comp) *comp = p->s->img_n; + return 1; +} + +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) +{ + stbi__png p; + p.s = s; + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; +} +#endif + +// Microsoft/Windows BMP image + +#ifndef STBI_NO_BMP +static int stbi__bmp_test_raw(stbi__context *s) +{ + int r; + int sz; + if (stbi__get8(s) != 'B') return 0; + if (stbi__get8(s) != 'M') return 0; + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + stbi__get32le(s); // discard data offset + sz = stbi__get32le(s); + r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); + return r; +} + +static int stbi__bmp_test(stbi__context *s) +{ + int r = stbi__bmp_test_raw(s); + stbi__rewind(s); + return r; +} + + +// returns 0..31 for the highest set bit +static int stbi__high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) { n += 16; z >>= 16; } + if (z >= 0x00100) { n += 8; z >>= 8; } + if (z >= 0x00010) { n += 4; z >>= 4; } + if (z >= 0x00004) { n += 2; z >>= 2; } + if (z >= 0x00002) { n += 1;/* >>= 1;*/ } + return n; +} + +static int stbi__bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; +} + +typedef struct +{ + int bpp, offset, hsz; + unsigned int mr,mg,mb,ma, all_a; + int extra_read; +} stbi__bmp_data; + +static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) +{ + // BI_BITFIELDS specifies masks explicitly, don't override + if (compress == 3) + return 1; + + if (compress == 0) { + if (info->bpp == 16) { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } else if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + // otherwise, use defaults, which is all-0 + info->mr = info->mg = info->mb = info->ma = 0; + } + return 1; + } + return 0; // error +} + +static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) +{ + int hsz; + if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); + stbi__get32le(s); // discard filesize + stbi__get16le(s); // discard reserved + stbi__get16le(s); // discard reserved + info->offset = stbi__get32le(s); + info->hsz = hsz = stbi__get32le(s); + info->mr = info->mg = info->mb = info->ma = 0; + info->extra_read = 14; + + if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = stbi__get16le(s); + s->img_y = stbi__get16le(s); + } else { + s->img_x = stbi__get32le(s); + s->img_y = stbi__get32le(s); + } + if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); + info->bpp = stbi__get16le(s); + if (hsz != 12) { + int compress = stbi__get32le(s); + if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes + if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel + stbi__get32le(s); // discard sizeof + stbi__get32le(s); // discard hres + stbi__get32le(s); // discard vres + stbi__get32le(s); // discard colorsused + stbi__get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + stbi__get32le(s); + } + if (info->bpp == 16 || info->bpp == 32) { + if (compress == 0) { + stbi__bmp_set_mask_defaults(info, compress); + } else if (compress == 3) { + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->extra_read += 12; + // not documented, but generated by photoshop and handled by mspaint + if (info->mr == info->mg && info->mg == info->mb) { + // ?!?!? + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else + return stbi__errpuc("bad BMP", "bad BMP"); + } + } else { + // V4/V5 header + int i; + if (hsz != 108 && hsz != 124) + return stbi__errpuc("bad BMP", "bad BMP"); + info->mr = stbi__get32le(s); + info->mg = stbi__get32le(s); + info->mb = stbi__get32le(s); + info->ma = stbi__get32le(s); + if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs + stbi__bmp_set_mask_defaults(info, compress); + stbi__get32le(s); // discard color space + for (i=0; i < 12; ++i) + stbi__get32le(s); // discard color space parameters + if (hsz == 124) { + stbi__get32le(s); // discard rendering intent + stbi__get32le(s); // discard offset of profile data + stbi__get32le(s); // discard size of profile data + stbi__get32le(s); // discard reserved + } + } + } + return (void *) 1; +} + + +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + unsigned int mr=0,mg=0,mb=0,ma=0, all_a; + stbi_uc pal[256][4]; + int psize=0,i,j,width; + int flip_vertically, pad, target; + stbi__bmp_data info; + STBI_NOTUSED(ri); + + info.all_a = 255; + if (stbi__bmp_parse_header(s, &info) == NULL) + return NULL; // error code already set + + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + mr = info.mr; + mg = info.mg; + mb = info.mb; + ma = info.ma; + all_a = info.all_a; + + if (info.hsz == 12) { + if (info.bpp < 24) + psize = (info.offset - info.extra_read - 24) / 3; + } else { + if (info.bpp < 16) + psize = (info.offset - info.extra_read - info.hsz) >> 2; + } + if (psize == 0) { + if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) { + return stbi__errpuc("bad offset", "Corrupt BMP"); + } + } + + if (info.bpp == 24 && ma == 0xff000000) + s->img_n = 3; + else + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + if (info.bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + if (info.hsz != 12) stbi__get8(s); + pal[i][3] = 255; + } + stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; + else if (info.bpp == 8) width = s->img_x; + else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } + } + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); + } + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + stbi__skip(s, info.offset - info.extra_read - info.hsz); + if (info.bpp == 24) width = 3 * s->img_x; + else if (info.bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (info.bpp == 24) { + easy = 1; + } else if (info.bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + // right shift amt to put high bit in position #7 + rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); + gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); + bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); + ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + unsigned char a; + out[z+2] = stbi__get8(s); + out[z+1] = stbi__get8(s); + out[z+0] = stbi__get8(s); + z += 3; + a = (easy == 2 ? stbi__get8(s) : 255); + all_a |= a; + if (target == 4) out[z++] = a; + } + } else { + int bpp = info.bpp; + for (i=0; i < (int) s->img_x; ++i) { + stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); + unsigned int a; + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); + out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); + a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); + all_a |= a; + if (target == 4) out[z++] = STBI__BYTECAST(a); + } + } + stbi__skip(s, pad); + } + } + + // if alpha channel is all 0s, replace with all 255s + if (target == 4 && all_a == 0) + for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) + out[i] = 255; + + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i]; p1[i] = p2[i]; p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + return out; +} +#endif + +// Targa Truevision - TGA +// by Jonathan Dummer +#ifndef STBI_NO_TGA +// returns STBI_rgb or whatever, 0 on error +static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) +{ + // only RGB or RGBA (incl. 16bit) or grey allowed + if (is_rgb16) *is_rgb16 = 0; + switch(bits_per_pixel) { + case 8: return STBI_grey; + case 16: if(is_grey) return STBI_grey_alpha; + // fallthrough + case 15: if(is_rgb16) *is_rgb16 = 1; + return STBI_rgb; + case 24: // fallthrough + case 32: return bits_per_pixel/8; + default: return 0; + } +} + +static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; + int sz, tga_colormap_type; + stbi__get8(s); // discard Offset + tga_colormap_type = stbi__get8(s); // colormap type + if( tga_colormap_type > 1 ) { + stbi__rewind(s); + return 0; // only RGB or indexed allowed + } + tga_image_type = stbi__get8(s); // image type + if ( tga_colormap_type == 1 ) { // colormapped (paletted) image + if (tga_image_type != 1 && tga_image_type != 9) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { + stbi__rewind(s); + return 0; + } + stbi__skip(s,4); // skip image x and y origin + tga_colormap_bpp = sz; + } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE + if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { + stbi__rewind(s); + return 0; // only RGB or grey allowed, +/- RLE + } + stbi__skip(s,9); // skip colormap specification and image x/y origin + tga_colormap_bpp = 0; + } + tga_w = stbi__get16le(s); + if( tga_w < 1 ) { + stbi__rewind(s); + return 0; // test width + } + tga_h = stbi__get16le(s); + if( tga_h < 1 ) { + stbi__rewind(s); + return 0; // test height + } + tga_bits_per_pixel = stbi__get8(s); // bits per pixel + stbi__get8(s); // ignore alpha bits + if (tga_colormap_bpp != 0) { + if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { + // when using a colormap, tga_bits_per_pixel is the size of the indexes + // I don't think anything but 8 or 16bit indexes makes sense + stbi__rewind(s); + return 0; + } + tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); + } else { + tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); + } + if(!tga_comp) { + stbi__rewind(s); + return 0; + } + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp; + return 1; // seems to have passed everything +} + +static int stbi__tga_test(stbi__context *s) +{ + int res = 0; + int sz, tga_color_type; + stbi__get8(s); // discard Offset + tga_color_type = stbi__get8(s); // color type + if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed + sz = stbi__get8(s); // image type + if ( tga_color_type == 1 ) { // colormapped (paletted) image + if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 + stbi__skip(s,4); // skip index of first colormap entry and number of entries + sz = stbi__get8(s); // check bits per palette color entry + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + stbi__skip(s,4); // skip image x and y origin + } else { // "normal" image w/o colormap + if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE + stbi__skip(s,9); // skip colormap specification and image x/y origin + } + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width + if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height + sz = stbi__get8(s); // bits per pixel + if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index + if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; + + res = 1; // if we got this far, everything's good and we can return 1 instead of 0 + +errorEnd: + stbi__rewind(s); + return res; +} + +// read 16bit value and convert to 24bit RGB +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +{ + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); + stbi__uint16 fiveBitMask = 31; + // we have 3 channels with 5bits each + int r = (px >> 10) & fiveBitMask; + int g = (px >> 5) & fiveBitMask; + int b = px & fiveBitMask; + // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); + + // some people claim that the most significant bit might be used for alpha + // (possibly if an alpha-bit is set in the "image descriptor byte") + // but that only made 16bit test images completely translucent.. + // so let's treat all 15 and 16bit TGAs as RGB with no alpha. +} + +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + // read in the TGA header stuff + int tga_offset = stbi__get8(s); + int tga_indexed = stbi__get8(s); + int tga_image_type = stbi__get8(s); + int tga_is_RLE = 0; + int tga_palette_start = stbi__get16le(s); + int tga_palette_len = stbi__get16le(s); + int tga_palette_bits = stbi__get8(s); + int tga_x_origin = stbi__get16le(s); + int tga_y_origin = stbi__get16le(s); + int tga_width = stbi__get16le(s); + int tga_height = stbi__get16le(s); + int tga_bits_per_pixel = stbi__get8(s); + int tga_comp, tga_rgb16=0; + int tga_inverted = stbi__get8(s); + // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4] = {0}; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + STBI_NOTUSED(ri); + STBI_NOTUSED(tga_x_origin); // @TODO + STBI_NOTUSED(tga_y_origin); // @TODO + + if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); + else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); + + if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency + return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); + + // tga info + *x = tga_width; + *y = tga_height; + if (comp) *comp = tga_comp; + + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); + if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); + + // skip to the data's starting position (offset usually = 0) + stbi__skip(s, tga_offset ); + + if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { + for (i=0; i < tga_height; ++i) { + int row = tga_inverted ? tga_height -i - 1 : i; + stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; + stbi__getn(s, tga_row, tga_width * tga_comp); + } + } else { + // do I need to load a palette? + if ( tga_indexed) + { + if (tga_palette_len == 0) { /* you have to have at least one entry! */ + STBI_FREE(tga_data); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + + // any data to skip? (offset usually = 0) + stbi__skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); + if (!tga_palette) { + STBI_FREE(tga_data); + return stbi__errpuc("outofmem", "Out of memory"); + } + if (tga_rgb16) { + stbi_uc *pal_entry = tga_palette; + STBI_ASSERT(tga_comp == STBI_rgb); + for (i=0; i < tga_palette_len; ++i) { + stbi__tga_read_rgb16(s, pal_entry); + pal_entry += tga_comp; + } + } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { + STBI_FREE(tga_data); + STBI_FREE(tga_palette); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + } + // load the data + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = stbi__get8(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in index, then perform the lookup + int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); + if ( pal_idx >= tga_palette_len ) { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_comp; + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else if(tga_rgb16) { + STBI_ASSERT(tga_comp == STBI_rgb); + stbi__tga_read_rgb16(s, raw_data); + } else { + // read in the data raw + for (j = 0; j < tga_comp; ++j) { + raw_data[j] = stbi__get8(s); + } + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + + // copy data + for (j = 0; j < tga_comp; ++j) + tga_data[i*tga_comp+j] = raw_data[j]; + + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * tga_comp; + int index2 = (tga_height - 1 - j) * tga_width * tga_comp; + for (i = tga_width * tga_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + STBI_FREE( tga_palette ); + } + } + + // swap RGB - if the source data was RGB16, it already is in the right order + if (tga_comp >= 3 && !tga_rgb16) + { + unsigned char* tga_pixel = tga_data; + for (i=0; i < tga_width * tga_height; ++i) + { + unsigned char temp = tga_pixel[0]; + tga_pixel[0] = tga_pixel[2]; + tga_pixel[2] = temp; + tga_pixel += tga_comp; + } + } + + // convert to target component count + if (req_comp && req_comp != tga_comp) + tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); + + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + STBI_NOTUSED(tga_palette_start); + // OK, done + return tga_data; +} +#endif + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +#ifndef STBI_NO_PSD +static int stbi__psd_test(stbi__context *s) +{ + int r = (stbi__get32be(s) == 0x38425053); + stbi__rewind(s); + return r; +} + +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + int pixelCount; + int channelCount, compression; + int channel, i; + int bitdepth; + int w,h; + stbi_uc *out; + STBI_NOTUSED(ri); + + // Check identifier + if (stbi__get32be(s) != 0x38425053) // "8BPS" + return stbi__errpuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (stbi__get16be(s) != 1) + return stbi__errpuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + stbi__skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) + return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = stbi__get32be(s); + w = stbi__get32be(s); + + if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + // Make sure the depth is 8 bits. + bitdepth = stbi__get16be(s); + if (bitdepth != 8 && bitdepth != 16) + return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (stbi__get16be(s) != 3) + return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + stbi__skip(s,stbi__get32be(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + stbi__skip(s, stbi__get32be(s) ); + + // Skip the reserved data. + stbi__skip(s, stbi__get32be(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = stbi__get16be(s); + if (compression > 1) + return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + + // Create the destination image. + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, + // which we're going to just skip. + stbi__skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + stbi_uc *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++, p += 4) + *p = (channel == 3 ? 255 : 0); + } else { + // Read the RLE data. + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + if (channel >= channelCount) { + // Fill this channel with default data. + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; + } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; + for (i = 0; i < pixelCount; i++, p += 4) + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } + } + } + } + } + + // remove weird white matte from PSD + if (channelCount >= 4) { + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } + } + } + } + + // convert to desired output format + if (req_comp && req_comp != 4) { + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + + if (comp) *comp = 4; + *y = h; + *x = w; + + return out; +} +#endif + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +#ifndef STBI_NO_PIC +static int stbi__pic_is4(stbi__context *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (stbi__get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int stbi__pic_test_core(stbi__context *s) +{ + int i; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + stbi__get8(s); + + if (!stbi__pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} stbi__pic_packet; + +static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); + dest[i]=stbi__get8(s); + } + } + + return dest; +} + +static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + stbi__pic_packet packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return stbi__errpuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + + act_comp |= packet->channel; + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return stbi__errpuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=stbi__get8(s); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (stbi_uc) left; + + if (!stbi__readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = stbi__get8(s), i; + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + + if (count==128) + count = stbi__get16be(s); + else + count -= 127; + if (count > left) + return stbi__errpuc("bad file","scanline overrun"); + + if (!stbi__readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return stbi__errpuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) +{ + stbi_uc *result; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; + + for (i=0; i<92; ++i) + stbi__get8(s); + + x = stbi__get16be(s); + y = stbi__get16be(s); + + if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); + + stbi__get32be(s); //skip `ratio' + stbi__get16be(s); //skip `fields' + stbi__get16be(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + if (!result) return stbi__errpuc("outofmem", "Out of memory"); + memset(result, 0xff, x*y*4); + + if (!stbi__pic_load_core(s,x,y,comp, result)) { + STBI_FREE(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=stbi__convert_format(result,4,req_comp,x,y); + + return result; +} + +static int stbi__pic_test(stbi__context *s) +{ + int r = stbi__pic_test_core(s); + stbi__rewind(s); + return r; +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb + +#ifndef STBI_NO_GIF +typedef struct +{ + stbi__int16 prefix; + stbi_uc first; + stbi_uc suffix; +} stbi__gif_lzw; + +typedef struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; + stbi_uc pal[256][4]; + stbi_uc lpal[256][4]; + stbi__gif_lzw codes[8192]; + stbi_uc *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; + int delay; +} stbi__gif; + +static int stbi__gif_test_raw(stbi__context *s) +{ + int sz; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; + sz = stbi__get8(s); + if (sz != '9' && sz != '7') return 0; + if (stbi__get8(s) != 'a') return 0; + return 1; +} + +static int stbi__gif_test(stbi__context *s) +{ + int r = stbi__gif_test_raw(s); + stbi__rewind(s); + return r; +} + +static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = stbi__get8(s); + pal[i][1] = stbi__get8(s); + pal[i][0] = stbi__get8(s); + pal[i][3] = transp == i ? 0 : 255; + } +} + +static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) +{ + stbi_uc version; + if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') + return stbi__err("not GIF", "Corrupt GIF"); + + version = stbi__get8(s); + if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); + if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); + + stbi__g_failure_reason = ""; + g->w = stbi__get16le(s); + g->h = stbi__get16le(s); + g->flags = stbi__get8(s); + g->bgindex = stbi__get8(s); + g->ratio = stbi__get8(s); + g->transparent = -1; + + if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!g) return stbi__err("outofmem", "Out of memory"); + if (!stbi__gif_header(s, g, comp, 1)) { + STBI_FREE(g); + stbi__rewind( s ); + return 0; + } + if (x) *x = g->w; + if (y) *y = g->h; + STBI_FREE(g); + return 1; +} + +static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) +{ + stbi_uc *p, *c; + int idx; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi__out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; + + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) +{ + stbi_uc lzw_cs; + stbi__int32 len, init_code; + stbi__uint32 first; + stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi__gif_lzw *p; + + lzw_cs = stbi__get8(s); + if (lzw_cs > 12) return NULL; + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (init_code = 0; init_code < clear; init_code++) { + g->codes[init_code].prefix = -1; + g->codes[init_code].first = (stbi_uc) init_code; + g->codes[init_code].suffix = (stbi_uc) init_code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = stbi__get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (stbi__int32) stbi__get8(s) << valid_bits; + valid_bits += 8; + } else { + stbi__int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + stbi__skip(s, len); + while ((len = stbi__get8(s)) > 0) + stbi__skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + + p->prefix = (stbi__int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + + stbi__out_gif_code(g, (stbi__uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return stbi__errpuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) +{ + int dispose; + int first_frame; + int pi; + int pcount; + STBI_NOTUSED(req_comp); + + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) + return stbi__errpuc("too large", "GIF image is too large"); + pcount = g->w * g->h; + g->out = (stbi_uc *) stbi__malloc(4 * pcount); + g->background = (stbi_uc *) stbi__malloc(4 * pcount); + g->history = (stbi_uc *) stbi__malloc(pcount); + if (!g->out || !g->background || !g->history) + return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "transparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to the color that was there the previous frame. + memset(g->out, 0x00, 4 * pcount); + memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) + memset(g->history, 0x00, pcount); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispose of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; + + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } + + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } + } + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); + } + + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + + for (;;) { + int tag = stbi__get8(s); + switch (tag) { + case 0x2C: /* Image Descriptor */ + { + stbi__int32 x, y, w, h; + stbi_uc *o; + + x = stbi__get16le(s); + y = stbi__get16le(s); + w = stbi__get16le(s); + h = stbi__get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + // if the width of the specified rectangle is 0, that means + // we may not see *any* pixels or the image is malformed; + // to make sure this is caught, move the current y down to + // max_y (which is what out_gif_code checks). + if (w == 0) + g->cur_y = g->max_y; + + g->lflags = stbi__get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (stbi_uc *) g->lpal; + } else if (g->flags & 0x80) { + g->color_table = (stbi_uc *) g->pal; + } else + return stbi__errpuc("missing color table", "Corrupt GIF"); + + o = stbi__process_gif_raster(s, g); + if (!o) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } + + return o; + } + + case 0x21: // Comment Extension. + { + int len; + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. + len = stbi__get8(s); + if (len == 4) { + g->eflags = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } + } else { + stbi__skip(s, len); + break; + } + } + while ((len = stbi__get8(s)) != 0) { + stbi__skip(s, len); + } + break; + } + + case 0x3B: // gif stream termination code + return (stbi_uc *) s; // using '1' causes warning on some compilers + + default: + return stbi__errpuc("unknown code", "Corrupt GIF"); + } + } +} + +static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) +{ + STBI_FREE(g->out); + STBI_FREE(g->history); + STBI_FREE(g->background); + + if (out) STBI_FREE(out); + if (delays && *delays) STBI_FREE(*delays); + return stbi__errpuc("outofmem", "Out of memory"); +} + +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + int out_size = 0; + int delays_size = 0; + + STBI_NOTUSED(out_size); + STBI_NOTUSED(delays_size); + + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); + if (!tmp) + return stbi__load_gif_main_outofmem(&g, out, delays); + else { + out = (stbi_uc*) tmp; + out_size = layers * stride; + } + + if (delays) { + int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); + if (!new_delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + *delays = new_delays; + delays_size = layers * sizeof(int); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (!out) + return stbi__load_gif_main_outofmem(&g, out, delays); + out_size = layers * stride; + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + if (!*delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + delays_size = layers * sizeof(int); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *u = 0; + stbi__gif g; + memset(&g, 0, sizeof(g)); + STBI_NOTUSED(ri); + + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. + if (req_comp && req_comp != 4) + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } else if (g.out) { + // if there was an error and we allocated an image buffer, free it! + STBI_FREE(g.out); + } + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + + return u; +} + +static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) +{ + return stbi__gif_info_raw(s,x,y,comp); +} +#endif + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int stbi__hdr_test_core(stbi__context *s, const char *signature) +{ + int i; + for (i=0; signature[i]; ++i) + if (stbi__get8(s) != signature[i]) + return 0; + stbi__rewind(s); + return 1; +} + +static int stbi__hdr_test(stbi__context* s) +{ + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); + stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } + return r; +} + +#define STBI__HDR_BUFLEN 1024 +static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) stbi__get8(z); + + while (!stbi__at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == STBI__HDR_BUFLEN-1) { + // flush to end of line + while (!stbi__at_eof(z) && stbi__get8(z) != '\n') + ; + break; + } + c = (char) stbi__get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + const char *headerToken; + STBI_NOTUSED(ri); + + // Check identifier + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) + return stbi__errpf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = (int) strtol(token, NULL, 10); + + if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + + *x = width; + *y = height; + + if (comp) *comp = 3; + if (req_comp == 0) req_comp = 3; + + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + + // Read data + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + stbi__getn(s, rgbe, 4); + stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = stbi__get8(s); + c2 = stbi__get8(s); + len = stbi__get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4]; + rgbe[0] = (stbi_uc) c1; + rgbe[1] = (stbi_uc) c2; + rgbe[2] = (stbi_uc) len; + rgbe[3] = (stbi_uc) stbi__get8(s); + stbi__hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + STBI_FREE(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= stbi__get8(s); + if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } + + for (k = 0; k < 4; ++k) { + int nleft; + i = 0; + while ((nleft = width - i) > 0) { + count = stbi__get8(s); + if (count > 128) { + // Run + value = stbi__get8(s); + count -= 128; + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = stbi__get8(s); + } + } + } + for (i=0; i < width; ++i) + stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + if (scanline) + STBI_FREE(scanline); + } + + return hdr_data; +} + +static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) +{ + char buffer[STBI__HDR_BUFLEN]; + char *token; + int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (stbi__hdr_test(s) == 0) { + stbi__rewind( s ); + return 0; + } + + for(;;) { + token = stbi__hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) { + stbi__rewind( s ); + return 0; + } + token = stbi__hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *y = (int) strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) { + stbi__rewind( s ); + return 0; + } + token += 3; + *x = (int) strtol(token, NULL, 10); + *comp = 3; + return 1; +} +#endif // STBI_NO_HDR + +#ifndef STBI_NO_BMP +static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) +{ + void *p; + stbi__bmp_data info; + + info.all_a = 255; + p = stbi__bmp_parse_header(s, &info); + if (p == NULL) { + stbi__rewind( s ); + return 0; + } + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) { + if (info.bpp == 24 && info.ma == 0xff000000) + *comp = 3; + else + *comp = info.ma ? 4 : 3; + } + return 1; +} +#endif + +#ifndef STBI_NO_PSD +static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) +{ + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + *y = stbi__get32be(s); + *x = stbi__get32be(s); + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 3) { + stbi__rewind( s ); + return 0; + } + *comp = 4; + return 1; +} + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + STBI_NOTUSED(stbi__get32be(s)); + STBI_NOTUSED(stbi__get32be(s)); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} +#endif + +#ifndef STBI_NO_PIC +static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) +{ + int act_comp=0,num_packets=0,chained,dummy; + stbi__pic_packet packets[10]; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { + stbi__rewind(s); + return 0; + } + + stbi__skip(s, 88); + + *x = stbi__get16be(s); + *y = stbi__get16be(s); + if (stbi__at_eof(s)) { + stbi__rewind( s); + return 0; + } + if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { + stbi__rewind( s ); + return 0; + } + + stbi__skip(s, 8); + + do { + stbi__pic_packet *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return 0; + + packet = &packets[num_packets++]; + chained = stbi__get8(s); + packet->size = stbi__get8(s); + packet->type = stbi__get8(s); + packet->channel = stbi__get8(s); + act_comp |= packet->channel; + + if (stbi__at_eof(s)) { + stbi__rewind( s ); + return 0; + } + if (packet->size != 8) { + stbi__rewind( s ); + return 0; + } + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); + + return 1; +} +#endif + +// ************************************************************************************************* +// Portable Gray Map and Portable Pixel Map loader +// by Ken Miller +// +// PGM: http://netpbm.sourceforge.net/doc/pgm.html +// PPM: http://netpbm.sourceforge.net/doc/ppm.html +// +// Known limitations: +// Does not support comments in the header section +// Does not support ASCII image data (formats P2 and P3) + +#ifndef STBI_NO_PNM + +static int stbi__pnm_test(stbi__context *s) +{ + char p, t; + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind( s ); + return 0; + } + return 1; +} + +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) +{ + stbi_uc *out; + STBI_NOTUSED(ri); + + ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); + if (ri->bits_per_channel == 0) + return 0; + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = s->img_n; + + if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) + return stbi__errpuc("too large", "PNM too large"); + + out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); + stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8)); + + if (req_comp && req_comp != s->img_n) { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // stbi__convert_format frees input on failure + } + return out; +} + +static int stbi__pnm_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; +} + +static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) +{ + for (;;) { + while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) + *c = (char) stbi__get8(s); + + if (stbi__at_eof(s) || *c != '#') + break; + + while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) + *c = (char) stbi__get8(s); + } +} + +static int stbi__pnm_isdigit(char c) +{ + return c >= '0' && c <= '9'; +} + +static int stbi__pnm_getinteger(stbi__context *s, char *c) +{ + int value = 0; + + while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { + value = value*10 + (*c - '0'); + *c = (char) stbi__get8(s); + } + + return value; +} + +static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) +{ + int maxv, dummy; + char c, p, t; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); + + // Get identifier + p = (char) stbi__get8(s); + t = (char) stbi__get8(s); + if (p != 'P' || (t != '5' && t != '6')) { + stbi__rewind(s); + return 0; + } + + *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm + + c = (char) stbi__get8(s); + stbi__pnm_skip_whitespace(s, &c); + + *x = stbi__pnm_getinteger(s, &c); // read width + stbi__pnm_skip_whitespace(s, &c); + + *y = stbi__pnm_getinteger(s, &c); // read height + stbi__pnm_skip_whitespace(s, &c); + + maxv = stbi__pnm_getinteger(s, &c); // read max value + if (maxv > 65535) + return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); + else if (maxv > 255) + return 16; + else + return 8; +} + +static int stbi__pnm_is16(stbi__context *s) +{ + if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) + return 1; + return 0; +} +#endif + +static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) +{ + #ifndef STBI_NO_JPEG + if (stbi__jpeg_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNG + if (stbi__png_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_GIF + if (stbi__gif_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_BMP + if (stbi__bmp_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PIC + if (stbi__pic_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_info(s, x, y, comp)) return 1; + #endif + + #ifndef STBI_NO_HDR + if (stbi__hdr_info(s, x, y, comp)) return 1; + #endif + + // test tga last because it's a crappy test! + #ifndef STBI_NO_TGA + if (stbi__tga_info(s, x, y, comp)) + return 1; + #endif + return stbi__err("unknown image type", "Image not of any known type, or corrupt"); +} + +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_is16(s)) return 1; + #endif + return 0; +} + +#ifndef STBI_NO_STDIO +STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__info_main(&s,x,y,comp); + fseek(f,pos,SEEK_SET); + return r; +} + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} +#endif // !STBI_NO_STDIO + +STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__info_main(&s,x,y,comp); +} + +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + +#endif // STB_IMAGE_IMPLEMENTATION + +/* + revision history: + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now + 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes + 2.11 (2016-04-02) allocate large structures on the stack + remove white matting for transparent PSD + fix reported channel count for PNG & BMP + re-enable SSE2 in non-gcc 64-bit + support RGB-formatted JPEG + read 16-bit PNGs (only as 8-bit) + 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED + 2.09 (2016-01-16) allow comments in PNM files + 16-bit-per-pixel TGA (not bit-per-component) + info() for TGA could break due to .hdr handling + info() for BMP to shares code instead of sloppy parse + can use STBI_REALLOC_SIZED if allocator doesn't support realloc + code cleanup + 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA + 2.07 (2015-09-13) fix compiler warnings + partial animated GIF support + limited 16-bpc PSD support + #ifdef unused functions + bug with < 92 byte PIC,PNM,HDR,TGA + 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value + 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning + 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit + 2.03 (2015-04-12) extra corruption checking (mmozeiko) + stbi_set_flip_vertically_on_load (nguillemot) + fix NEON support; fix mingw support + 2.02 (2015-01-19) fix incorrect assert, fix warning + 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 + 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG + 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) + progressive JPEG (stb) + PGM/PPM support (Ken Miller) + STBI_MALLOC,STBI_REALLOC,STBI_FREE + GIF bugfix -- seemingly never worked + STBI_NO_*, STBI_ONLY_* + 1.48 (2014-12-14) fix incorrectly-named assert() + 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) + optimize PNG (ryg) + fix bug in interlaced PNG with user-specified channel count (stb) + 1.46 (2014-08-26) + fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG + 1.45 (2014-08-16) + fix MSVC-ARM internal compiler error by wrapping malloc + 1.44 (2014-08-07) + various warning fixes from Ronny Chevalier + 1.43 (2014-07-15) + fix MSVC-only compiler problem in code changed in 1.42 + 1.42 (2014-07-09) + don't define _CRT_SECURE_NO_WARNINGS (affects user code) + fixes to stbi__cleanup_jpeg path + added STBI_ASSERT to avoid requiring assert.h + 1.41 (2014-06-25) + fix search&replace from 1.36 that messed up comments/error messages + 1.40 (2014-06-22) + fix gcc struct-initialization warning + 1.39 (2014-06-15) + fix to TGA optimization when req_comp != number of components in TGA; + fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) + add support for BMP version 5 (more ignored fields) + 1.38 (2014-06-06) + suppress MSVC warnings on integer casts truncating values + fix accidental rename of 'skip' field of I/O + 1.37 (2014-06-04) + remove duplicate typedef + 1.36 (2014-06-03) + convert to header file single-file library + if de-iphone isn't set, load iphone images color-swapped instead of returning NULL + 1.35 (2014-05-27) + various warnings + fix broken STBI_SIMD path + fix bug where stbi_load_from_file no longer left file pointer in correct place + fix broken non-easy path for 32-bit BMP (possibly never used) + TGA optimization by Arseny Kapoulkine + 1.34 (unknown) + use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case + 1.33 (2011-07-14) + make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements + 1.32 (2011-07-13) + support for "info" function for all supported filetypes (SpartanJ) + 1.31 (2011-06-20) + a few more leak fixes, bug in PNG handling (SpartanJ) + 1.30 (2011-06-11) + added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) + removed deprecated format-specific test/load functions + removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway + error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) + fix inefficiency in decoding 32-bit BMP (David Woo) + 1.29 (2010-08-16) + various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) + fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-stbi_uc to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) + 1.21 fix use of 'stbi_uc' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 (2008-08-02) + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - stbi__convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant + 0.50 (2006-11-19) + first released version +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/nvblox/include/nvblox/datasets/image_loader.h b/nvblox/include/nvblox/datasets/image_loader.h new file mode 100644 index 00000000..078ccba4 --- /dev/null +++ b/nvblox/include/nvblox/datasets/image_loader.h @@ -0,0 +1,86 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace datasets { + +// Loads a depth image encoded as a 16-bit PNG, assumed to express the depth in +// mm, and converts it to a float with meters. +bool load16BitDepthImage(const std::string& filename, + DepthImage* depth_frame_ptr, + MemoryType memory_type = kDefaultImageMemoryType); +bool load8BitColorImage(const std::string& filename, + ColorImage* color_image_ptr, + MemoryType memory_type = kDefaultImageMemoryType); + +using IndexToFilepathFunction = std::function; + +// Single-threaded image loader +template +class ImageLoader { + public: + ImageLoader(IndexToFilepathFunction index_to_filepath, + MemoryType memory_type = kDefaultImageMemoryType) + : index_to_filepath_(index_to_filepath), memory_type_(memory_type) {} + virtual ~ImageLoader() {} + + virtual bool getNextImage(ImageType* image_ptr); + + protected: + bool getImage(int image_idx, ImageType*); + + int image_idx_ = 0; + IndexToFilepathFunction index_to_filepath_; + MemoryType memory_type_; +}; + +// Multi-threaded image loader +template +using ImageOptional = std::pair; + +// Multi-threaded image loader +template +class MultiThreadedImageLoader : public ImageLoader { + public: + MultiThreadedImageLoader(IndexToFilepathFunction index_to_filepath, + int num_threads, + MemoryType memory_type = kDefaultImageMemoryType); + ~MultiThreadedImageLoader(); + + bool getNextImage(ImageType* image_ptr) override; + + protected: + void initLoadQueue(); + void emptyLoadQueue(); + void addNextImageToQueue(); + ImageOptional getImageAsOptional(int image_idx); + + const int num_threads_; + std::deque>> load_queue_; +}; + +} // namespace datasets +} // namespace nvblox + +#include "nvblox/datasets/impl/image_loader_impl.h" diff --git a/nvblox/include/nvblox/datasets/impl/image_loader_impl.h b/nvblox/include/nvblox/datasets/impl/image_loader_impl.h new file mode 100644 index 00000000..e8794cd1 --- /dev/null +++ b/nvblox/include/nvblox/datasets/impl/image_loader_impl.h @@ -0,0 +1,88 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { +namespace datasets { + +template +bool ImageLoader::getNextImage(ImageType* image_ptr) { + bool res = getImage(image_idx_, image_ptr); + ++image_idx_; + return res; +} + +template +MultiThreadedImageLoader::MultiThreadedImageLoader( + IndexToFilepathFunction index_to_filepath, int num_threads, + MemoryType memory_type) + : ImageLoader(index_to_filepath, memory_type), + num_threads_(num_threads) { + initLoadQueue(); +} + +template +MultiThreadedImageLoader::~MultiThreadedImageLoader() { + emptyLoadQueue(); +} + +template +bool MultiThreadedImageLoader::getNextImage(ImageType* image_ptr) { + CHECK_NOTNULL(image_ptr); + auto images_optional_future = std::move(load_queue_.front()); + ImageOptional image_optional = + std::move(images_optional_future.get()); + if (image_optional.first) { + *image_ptr = std::move(image_optional.second); + } + load_queue_.pop_front(); + addNextImageToQueue(); + return image_optional.first; +} + +template +void MultiThreadedImageLoader::initLoadQueue() { + for (int i = 0; i < num_threads_; i++) { + addNextImageToQueue(); + } +} + +template +void MultiThreadedImageLoader::emptyLoadQueue() { + while (!load_queue_.empty()) { + load_queue_.pop_front(); + } +} + +template +void MultiThreadedImageLoader::addNextImageToQueue() { + load_queue_.push_back( + std::async(std::launch::async, + &MultiThreadedImageLoader::getImageAsOptional, this, + this->image_idx_)); + ++this->image_idx_; +} + +template +ImageOptional +MultiThreadedImageLoader::getImageAsOptional(int image_idx) { + ImageType image; + bool success_flag = ImageLoader::getImage(image_idx, &image); + return {success_flag, std::move(image)}; +} + +} // namespace datasets +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/datasets/parse_3dmatch.h b/nvblox/include/nvblox/datasets/parse_3dmatch.h new file mode 100644 index 00000000..774dfdaf --- /dev/null +++ b/nvblox/include/nvblox/datasets/parse_3dmatch.h @@ -0,0 +1,63 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/types.h" +#include "nvblox/datasets/image_loader.h" + +namespace nvblox { +namespace datasets { +namespace threedmatch { + +/// Parses a 4x4 matrix from a text file in 3D Match format: space-separated. +bool parsePoseFromFile(const std::string& filename, Transform* transform); + +/// Parse 3x3 camera intrinsics matrix from 3D Match format: space-separated. +bool parseCameraFromFile(const std::string& filename, + Eigen::Matrix3f* intrinsics); + +std::string getPathForCameraIntrinsics(const std::string& base_path); +std::string getPathForFramePose(const std::string& base_path, const int seq_id, + const int frame_id); +std::string getPathForDepthImage(const std::string& base_path, const int seq_id, + const int frame_id); +std::string getPathForColorImage(const std::string& base_path, const int seq_id, + const int frame_id); + +// Factory functions for single-threaded 3DMatch image loaders +std::unique_ptr> +createDepthImageLoader(const std::string& base_path, const int seq_id, + MemoryType memory_type = kDefaultImageMemoryType); +std::unique_ptr> +createColorImageLoader(const std::string& base_path, const int seq_id, + MemoryType memory_type = kDefaultImageMemoryType); + +// Factory functions for multi-threaded 3DMatch image loaders +std::unique_ptr> +createMultithreadedDepthImageLoader( + const std::string& base_path, const int seq_id, const int num_threads, + MemoryType memory_type = kDefaultImageMemoryType); +std::unique_ptr> +createMultithreadedColorImageLoader( + const std::string& base_path, const int seq_id, const int num_threads, + MemoryType memory_type = kDefaultImageMemoryType); + +} // namespace threedmatch +} // namespace datasets +} // namespace nvblox diff --git a/nvblox/include/nvblox/gpu_hash/cuda/gpu_hash_interface.cuh b/nvblox/include/nvblox/gpu_hash/cuda/gpu_hash_interface.cuh new file mode 100644 index 00000000..7df672fa --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/cuda/gpu_hash_interface.cuh @@ -0,0 +1,58 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include "nvblox/core/hash.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +template +using IndexBlockPair = thrust::pair; + +template +using ConstIndexBlockPair = thrust::pair; + +template +using Index3DDeviceHashMapType = + stdgpu::unordered_map>; + +template +class GPUHashImpl { + public: + GPUHashImpl() = default; + GPUHashImpl(int max_num_blocks); + ~GPUHashImpl(); + + size_t size() const { return static_cast(max_num_blocks_); } + + stdgpu::index_t max_num_blocks_; + Index3DDeviceHashMapType impl_; +}; + +} // namespace nvblox + +#include "nvblox/gpu_hash/cuda/impl/gpu_hash_interface_impl.cuh" diff --git a/nvblox/include/nvblox/gpu_hash/cuda/gpu_indexing.cuh b/nvblox/include/nvblox/gpu_hash/cuda/gpu_indexing.cuh new file mode 100644 index 00000000..5eacd1b7 --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/cuda/gpu_indexing.cuh @@ -0,0 +1,46 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +#include "nvblox/gpu_hash/cuda/gpu_hash_interface.cuh" + +namespace nvblox { + +template +__device__ inline bool getVoxelAtPosition( + Index3DDeviceHashMapType> block_hash, + const Vector3f& p_L, float block_size, VoxelType** voxel_ptr) { + Index3D block_idx; + Index3D voxel_idx; + getBlockAndVoxelIndexFromPositionInLayer(block_size, p_L, &block_idx, + &voxel_idx); + auto it = block_hash.find(block_idx); + if (it != block_hash.end()) { + *voxel_ptr = + &it->second->voxels[voxel_idx.x()][voxel_idx.y()][voxel_idx.z()]; + return true; + } + return false; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/gpu_hash/cuda/gpu_set.cuh b/nvblox/include/nvblox/gpu_hash/cuda/gpu_set.cuh new file mode 100644 index 00000000..858427b5 --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/cuda/gpu_set.cuh @@ -0,0 +1,41 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "nvblox/core/hash.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +typedef class stdgpu::unordered_set> + Index3DDeviceSet_t; + +struct Index3DDeviceSet { + Index3DDeviceSet(size_t size); + ~Index3DDeviceSet(); + + Index3DDeviceSet_t set; +}; + +// Copies the contents of a Index3DDeviceSet to an std::vector. +void copySetToVector(const Index3DDeviceSet_t& set, std::vector* vec); + +} // namespace nvblox diff --git a/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_hash_interface_impl.cuh b/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_hash_interface_impl.cuh new file mode 100644 index 00000000..a0012df3 --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_hash_interface_impl.cuh @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +template +GPUHashImpl::GPUHashImpl(int max_num_blocks) + : max_num_blocks_(max_num_blocks), + impl_(Index3DDeviceHashMapType::createDeviceObject( + max_num_blocks_)) { + VLOG(3) << "Creating a GPUHashImpl with max capacity of " << max_num_blocks_ + << " blocks"; +} + +template +GPUHashImpl::~GPUHashImpl() { + if (impl_.size() > 0) { + Index3DDeviceHashMapType::destroyDeviceObject(impl_); + VLOG(3) << "Destroying a GPUHashImpl"; + } +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh b/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh new file mode 100644 index 00000000..f4b9bf9b --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh @@ -0,0 +1,132 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/gpu_hash/cuda/gpu_hash_interface.cuh" +#include "nvblox/gpu_hash/gpu_layer_view.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +template +GPULayerView::GPULayerView(LayerType* layer_ptr) + : gpu_hash_ptr_(std::make_shared>()) { + reset(layer_ptr); +} + +template +GPULayerView::GPULayerView(size_t max_num_blocks) + : gpu_hash_ptr_(std::make_shared>(max_num_blocks)) { + // The GPUHashImpl takes care of allocating GPU memory. +} + +template +GPULayerView::~GPULayerView() { + // The GPUHashImpl takes care of cleaning up GPU memory. +} + +template +GPULayerView::GPULayerView(const GPULayerView& other) + : block_size_(other.block_size_), gpu_hash_ptr_(other.gpu_hash_ptr_) {} + +template +GPULayerView::GPULayerView(GPULayerView&& other) + : block_size_(other.block_size_), + gpu_hash_ptr_(std::move(other.gpu_hash_ptr_)) {} + +template +GPULayerView& GPULayerView::operator=( + const GPULayerView& other) { + block_size_ = other.block_size_; + gpu_hash_ptr_ = other.gpu_hash_ptr_; + return *this; +} + +template +GPULayerView& GPULayerView::operator=( + GPULayerView&& other) { + block_size_ = other.block_size_; + gpu_hash_ptr_ = std::move(other.gpu_hash_ptr_); + return *this; +} + +template +void GPULayerView::reset(LayerType* layer_ptr) { + CHECK_NOTNULL(layer_ptr); + + // Allocate gpu hash if not allocated already + if (!gpu_hash_ptr_) { + gpu_hash_ptr_ = std::make_shared>( + layer_ptr->numAllocatedBlocks() * size_expansion_factor_); + } + + // Check the load factor and increase the size if required. + const float current_load_factor = + static_cast(layer_ptr->numAllocatedBlocks()) / + static_cast(gpu_hash_ptr_->max_num_blocks_); + if (current_load_factor > max_load_factor_) { + const size_t new_max_num_blocks = std::ceil( + size_expansion_factor_ * std::max(gpu_hash_ptr_->max_num_blocks_, + layer_ptr->numAllocatedBlocks())); + reset(new_max_num_blocks); + CHECK_LT(layer_ptr->numAllocatedBlocks(), gpu_hash_ptr_->max_num_blocks_); + } + + timing::Timer timer("gpu_hash/transfer"); + + gpu_hash_ptr_->impl_.clear(); + + block_size_ = layer_ptr->block_size(); + + // Arange blocks in continuous host memory for transfer + thrust::host_vector> host_block_vector; + host_block_vector.reserve(layer_ptr->numAllocatedBlocks()); + for (const auto& index : layer_ptr->getAllBlockIndices()) { + host_block_vector.push_back(IndexBlockPair( + index, layer_ptr->getBlockAtIndex(index).get())); + } + + // CPU -> GPU + thrust::device_vector> device_block_vector( + host_block_vector); + + // GPU Insert + // NOTE(alexmillane): We have to do some unfortunate casting here. The problem + // is that the hash stores pairs with const keys, but the IndexBlockPair + // vector CANNOT have const keys. So the only way is to perform this cast. + ConstIndexBlockPair* block_start_raw_ptr = + reinterpret_cast*>( + device_block_vector.data().get()); + gpu_hash_ptr_->impl_.insert( + stdgpu::make_device(block_start_raw_ptr), + stdgpu::make_device(block_start_raw_ptr + device_block_vector.size())); + + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +template +void GPULayerView::reset(size_t new_max_num_blocks) { + timing::Timer timer("gpu_hash/reallocation"); + gpu_hash_ptr_ = std::make_shared>(new_max_num_blocks); +} + +template +size_t GPULayerView::size() const { + return gpu_hash_ptr_->size(); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/gpu_hash/gpu_layer_view.h b/nvblox/include/nvblox/gpu_hash/gpu_layer_view.h new file mode 100644 index 00000000..f1e5dc71 --- /dev/null +++ b/nvblox/include/nvblox/gpu_hash/gpu_layer_view.h @@ -0,0 +1,84 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +namespace nvblox { + +template +class GPUHashImpl; + +template +class BlockLayer; + +template +class GPULayerView { + public: + using LayerType = BlockLayer; + + GPULayerView() = default; + GPULayerView(size_t max_num_blocks); + GPULayerView(LayerType* layer_ptr); + + GPULayerView(const GPULayerView& other); + GPULayerView(GPULayerView&& other); + GPULayerView& operator=(const GPULayerView& other); + GPULayerView& operator=(GPULayerView&& other); + + ~GPULayerView(); + + // Creates a new GPULayerView from a layer + void reset(LayerType* layer_ptr); + + // Resizes the underlying GPU hash as well as deleting its contents + void reset(size_t new_max_num_blocks); + + float block_size() const { return block_size_; } + + const GPUHashImpl& getHash() const { return *gpu_hash_ptr_; } + + size_t size() const; + + // Accessors + float max_load_factor() const { return max_load_factor_; } + float size_expansion_factor() const { return size_expansion_factor_; } + + private: + // Layer params + float block_size_; + + // The load factor at which we reallocate space. + const float max_load_factor_ = 0.75; + + // This is the factor by which we overallocate space + const float size_expansion_factor_ = 2.0f; + + // NOTE(alexmillane): To keep GPU code out of the header we use PIMPL to hide + // the details of the GPU hash. + std::shared_ptr> gpu_hash_ptr_; +}; + +} // namespace nvblox + +// NOTE(alexmillane): +// - I am leaving this here as a reminder to NOT include the implementation. +// - The implementation file is instead included by .cu files which declare +// specializations. +// - The problem is that we don't want the GPULayerView implementation, which +// contains CUDA calls and stdgpu code, bleeding into into layer.h, one of our +// main interace headers. +//#include "nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh" diff --git a/nvblox/include/nvblox/integrators/cuda/impl/projective_integrators_common_impl.cuh b/nvblox/include/nvblox/integrators/cuda/impl/projective_integrators_common_impl.cuh new file mode 100644 index 00000000..8565d4f0 --- /dev/null +++ b/nvblox/include/nvblox/integrators/cuda/impl/projective_integrators_common_impl.cuh @@ -0,0 +1,49 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +__device__ inline bool projectThreadVoxel( + const Index3D* block_indices_device_ptr, const Camera& camera, + const Transform& T_C_L, const float block_size, Eigen::Vector2f* u_px_ptr, + float* u_depth_ptr) { + // The indices of the voxel this thread will work on + // block_indices_device_ptr[blockIdx.x]: + // - The index of the block we're working on (blockIdx.y/z + // should be zero) + // threadIdx.x/y/z - The indices of the voxel within the block (we + // expect the threadBlockDims == voxelBlockDims) + const Index3D block_idx = block_indices_device_ptr[blockIdx.x]; + const Index3D voxel_idx(threadIdx.z, threadIdx.y, threadIdx.x); + + // Voxel center point + const Vector3f p_voxel_center_L = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_idx, voxel_idx); + // To camera frame + const Vector3f p_voxel_center_C = T_C_L * p_voxel_center_L; + + // Project to image plane + if (!camera.project(p_voxel_center_C, u_px_ptr)) { + return false; + } + + // Depth + *u_depth_ptr = p_voxel_center_C.z(); + return true; +} + +} // namespace nvblox diff --git a/nvblox/include/nvblox/integrators/cuda/projective_integrators_common.cuh b/nvblox/include/nvblox/integrators/cuda/projective_integrators_common.cuh new file mode 100644 index 00000000..f1e8c3ba --- /dev/null +++ b/nvblox/include/nvblox/integrators/cuda/projective_integrators_common.cuh @@ -0,0 +1,32 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +namespace nvblox { + +__device__ inline bool projectThreadVoxel( + const Index3D* block_indices_device_ptr, const Camera& camera, + const Transform& T_C_L, const float block_size, Eigen::Vector2f* u_px_ptr, + float* u_depth_ptr); + +} // namespace nvblox + +#include "nvblox/integrators/cuda/impl/projective_integrators_common_impl.cuh" diff --git a/nvblox/include/nvblox/integrators/esdf_integrator.h b/nvblox/include/nvblox/integrators/esdf_integrator.h new file mode 100644 index 00000000..a5b0c348 --- /dev/null +++ b/nvblox/include/nvblox/integrators/esdf_integrator.h @@ -0,0 +1,167 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_vector.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { + +class EsdfIntegrator { + public: + // Just takes care of the cuda stream. + ~EsdfIntegrator(); + + // Integrate the entire layer. + void integrateLayer(const TsdfLayer& tsdf_layer, EsdfLayer* esdf_layer); + + // Integrate specific blocks in a layer. + void integrateBlocks(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer); + + // Separate CPU and GPU methods. + void integrateBlocksOnCPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer); + void integrateBlocksOnGPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer); + void integrateSliceOnGPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + float z_min, float z_max, float z_output, + EsdfLayer* esdf_layer); + + // Setting setters and getting getters + float max_distance_m() const { return max_distance_m_; } + float& max_distance_m() { return max_distance_m_; } + + float min_site_distance_vox() const { return min_site_distance_vox_; } + float& min_site_distance_vox() { return min_site_distance_vox_; } + + float min_weight() const { return min_weight_; } + float& min_weight() { return min_weight_; } + + float max_squared_distance_vox(float voxel_size) const { + return max_distance_m_ * max_distance_m_ / (voxel_size * voxel_size); + } + + protected: + void allocateBlocksOnCPU(const std::vector& block_indices, + EsdfLayer* esdf_layer); + void markAllSitesOnCPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer, + std::vector* blocks_with_sites, + std::vector* blocks_to_clear); + void computeEsdfOnCPU(const std::vector& blocks_with_sites, + EsdfLayer* esdf_layer); + + // Internal functions for ESDF computation. + void sweepBlockBandOnCPU(int axis_to_sweep, float voxel_size, + EsdfBlock* esdf_block); + void updateLocalNeighborBandsOnCPU(const Index3D& block_index, + EsdfLayer* esdf_layer, + std::vector* updated_blocks); + void propagateEdgesOnCPU(int axis_to_sweep, float voxel_size, + EsdfBlock* esdf_block); + + // CPU-based incremental. + void clearInvalidOnCPU(const std::vector& blocks_with_sites, + EsdfLayer* esdf_layer, + std::vector* updated_blocks); + void clearNeighborsOnCPU(const Index3D& block_index, EsdfLayer* esdf_layer, + Index3DSet* neighbor_clearing_set); + void clearVoxel(EsdfVoxel* voxel, float max_squared_distance_vox); + + // Test function to just clear everything in the whole map. + void clearAllInvalidOnCPU(EsdfLayer* esdf_layer, + std::vector* updated_blocks); + + // GPU computation functions. + void markAllSitesOnGPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer, + std::vector* blocks_with_sites, + std::vector* cleared_blocks); + + // Same as the other function but basically makes the whole operation 2D. + // Considers a min and max z in a bounding box which is compressed down into a + // single layer. + void markSitesInSliceOnGPU(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + float min_z, float max_z, float output_z, + EsdfLayer* esdf_layer, + std::vector* output_blocks, + std::vector* cleared_blocks); + + void clearInvalidOnGPU(const std::vector& blocks_to_clear, + EsdfLayer* esdf_layer, + std::vector* updated_blocks); + + void computeEsdfOnGPU(const std::vector& blocks_with_sites, + EsdfLayer* esdf_layer); + + // Internal helpers for GPU computation. + void sweepBlockBandOnGPU(device_vector& block_pointers, + float max_squared_distance_vox); + void updateLocalNeighborBandsOnGPU( + const std::vector& block_indices, + device_vector& block_pointers, float max_squared_distance_vox, + EsdfLayer* esdf_layer, std::vector* updated_blocks, + device_vector* updated_block_pointers); + void createNeighborTable(const std::vector& block_indices, + EsdfLayer* esdf_layer, + std::vector* neighbor_indices, + host_vector* neighbor_pointers, + host_vector* neighbor_table); + void clearBlockNeighbors(std::vector& clear_list, + EsdfLayer* esdf_layer, + std::vector* new_clear_list); + + // Helper function to figure out which axes to sweep first and second. + void getSweepAxes(int axis_to_sweep, int* first_axis, int* second_axis) const; + + /// Maximum distance to compute the ESDF. + float max_distance_m_ = 10.0; + /// Minimum distance to a site + float min_site_distance_vox_ = 1.0; + /// Minimum weight to consider a TSDF voxel observed. + float min_weight_ = 1e-4; + + // State. + cudaStream_t cuda_stream_ = nullptr; + + // Temporary storage variables so we don't have to reallocate as much. + host_vector updated_blocks_host_; + device_vector updated_blocks_device_; + host_vector cleared_blocks_host_; + device_vector cleared_blocks_device_; + host_vector tsdf_pointers_host_; + device_vector tsdf_pointers_device_; + host_vector block_pointers_host_; + device_vector block_pointers_device_; + host_vector neighbor_table_host_; + device_vector neighbor_table_device_; + host_vector neighbor_pointers_host_; + device_vector neighbor_pointers_device_; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/integrators/frustum.h b/nvblox/include/nvblox/integrators/frustum.h new file mode 100644 index 00000000..aa0526d3 --- /dev/null +++ b/nvblox/include/nvblox/integrators/frustum.h @@ -0,0 +1,101 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_vector.h" + +namespace nvblox { + +// Helper class for frustum calculations. +class FrustumCalculator { + public: + FrustumCalculator(); + ~FrustumCalculator(); + + // Gets blocks which fall into the camera view up to: max camera distance + static std::vector getBlocksInView(const Transform& T_L_C, + const Camera& camera, + const float block_size, + const float max_distance); + + // Gets blocks which fall into the camera view up to: max distance in the + // depth frame + static std::vector getBlocksInImageView( + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float block_size, + const float truncation_distance_m, + const float max_integration_distance_m); + + // Performs ray casting to get the blocks in view + // NOTE(helenol) The only non-static member function of this class. + std::vector getBlocksInImageViewCuda( + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float block_size, + const float truncation_distance_m, + const float max_integration_distance_m); + + // Params + unsigned int raycast_subsampling_factor() const; + void raycast_subsampling_factor(unsigned int raycast_subsampling_rate); + + private: + // Raycasts to all corners of all blocks touched by the endpoints of depth + // rays. + void getBlocksByRaycastingCorners( + const Transform& T_L_C, // NOLINT + const Camera& camera, // NOLINT + const DepthImage& depth_frame, // NOLINT + float block_size, // NOLINT + const float truncation_distance_m, // NOLINT + const float max_integration_distance_m, // NOLINT + const Index3D& min_index, // NOLINT + const Index3D& aabb_size, // NOLINT + bool* aabb_updated_cuda); + + // Raycasts through (possibly subsampled) pixels in the image. + void getBlocksByRaycastingPixels( + const Transform& T_L_C, // NOLINT + const Camera& camera, // NOLINT + const DepthImage& depth_frame, // NOLINT + float block_size, // NOLINT + const float truncation_distance_m, // NOLINT + const float max_integration_distance_m, // NOLINT + const Index3D& min_index, // NOLINT + const Index3D& aabb_size, // NOLINT + bool* aabb_updated_cuda); + + // A 3D grid of bools, one for each block in the AABB, which indicates if it + // is in the view. The 3D grid is represented as a flat vector. + device_vector aabb_device_buffer_; + host_vector aabb_host_buffer_; + + // Whether to use the single kernel or double kernel computation for the + // CUDA version. Single kernel raycasts to every pixel, double kernel + // raycasts to every block corner. + bool raycast_to_pixels_ = true; + // The rate at which we subsample pixels to raycast. Note that we always + // raycast the extremes of the frame, no matter the subsample rate. + unsigned int raycast_subsampling_factor_ = 1; + + cudaStream_t cuda_stream_; +}; + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/integrators/impl/integrators_common_impl.h b/nvblox/include/nvblox/integrators/impl/integrators_common_impl.h new file mode 100644 index 00000000..e0a3fe1f --- /dev/null +++ b/nvblox/include/nvblox/integrators/impl/integrators_common_impl.h @@ -0,0 +1,57 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +namespace nvblox { + +template +__host__ std::vector getBlockPtrsFromIndices( + const std::vector& block_indices, + BlockLayer* layer_ptr) { + std::vector block_ptrs; + block_ptrs.reserve(block_indices.size()); + for (const Index3D& block_index : block_indices) { + typename BlockType::Ptr block_ptr = layer_ptr->getBlockAtIndex(block_index); + CHECK(block_ptr); + block_ptrs.push_back(block_ptr.get()); + } + return block_ptrs; +} + +template +__host__ std::vector getBlockPtrsFromIndices( + const std::vector& block_indices, + const BlockLayer& layer) { + std::vector block_ptrs; + block_ptrs.reserve(block_indices.size()); + for (const Index3D& block_index : block_indices) { + const typename BlockType::ConstPtr block_ptr = + layer.getBlockAtIndex(block_index); + CHECK(block_ptr); + block_ptrs.push_back(block_ptr.get()); + } + return block_ptrs; +} + +template +void allocateBlocksWhereRequired(const std::vector& block_indices, + BlockLayer>* layer) { + for (const Index3D& block_index : block_indices) { + layer->allocateBlockAtIndex(block_index); + } +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/integrators/impl/ray_caster_impl.h b/nvblox/include/nvblox/integrators/impl/ray_caster_impl.h new file mode 100644 index 00000000..004242a0 --- /dev/null +++ b/nvblox/include/nvblox/integrators/impl/ray_caster_impl.h @@ -0,0 +1,90 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/indexing.h" + +namespace nvblox { + +__host__ __device__ int inline signum(float x) { + return (x > 0.0f) ? 1 : ((x < 0.0f) ? -1 : 0); +} + +RayCaster::RayCaster(const Vector3f& origin, const Vector3f& destination, + float scale) + : scale_(scale) { + Vector3f start_scaled = origin / scale_; + Vector3f end_scaled = destination / scale_; + + current_index_ = getBlockIndexFromPositionInLayer(scale, origin); + const Index3D end_index = + getBlockIndexFromPositionInLayer(scale, destination); + const Index3D diff_index = end_index - current_index_; + + current_step_ = 0; + + ray_length_in_steps_ = diff_index.cwiseAbs().sum(); + const Vector3f ray_scaled = end_scaled - start_scaled; + + ray_step_signs_ = Index3D(signum(ray_scaled.x()), signum(ray_scaled.y()), + signum(ray_scaled.z())); + + const Index3D corrected_step = ray_step_signs_.cwiseMax(0); + + const Vector3f start_scaled_shifted = + start_scaled - current_index_.cast(); + + Vector3f distance_to_boundaries(corrected_step.cast() - + start_scaled_shifted); + + // NaNs are fine in the next 2 lines. + t_to_next_boundary_ = distance_to_boundaries.cwiseQuotient(ray_scaled); + + // Distance to cross one grid cell along the ray in t. + // Same as absolute inverse value of delta_coord. + t_step_size_ = ray_step_signs_.cast().cwiseQuotient(ray_scaled); +} + +bool RayCaster::nextRayIndex(Index3D* ray_index) { + if (current_step_++ > ray_length_in_steps_) { + return false; + } + + DCHECK(ray_index != nullptr); + *ray_index = current_index_; + + int t_min_idx; + t_to_next_boundary_.minCoeff(&t_min_idx); + current_index_[t_min_idx] += ray_step_signs_[t_min_idx]; + t_to_next_boundary_[t_min_idx] += t_step_size_[t_min_idx]; + + return true; +} + +bool RayCaster::nextRayPositionScaled(Vector3f* ray_position) { + Index3D ray_index; + bool success = nextRayIndex(&ray_index); + *ray_position = scale_ * ray_index.cast(); + return success; +} + +void RayCaster::getAllIndices(std::vector* indices) { + indices->clear(); + Index3D next_index; + while (nextRayIndex(&next_index)) { + indices->push_back(next_index); + } +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/integrators/integrators_common.h b/nvblox/include/nvblox/integrators/integrators_common.h new file mode 100644 index 00000000..e2633f8a --- /dev/null +++ b/nvblox/include/nvblox/integrators/integrators_common.h @@ -0,0 +1,42 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/layer.h" + +namespace nvblox { + +// Convert a list of BlockIndices on host, to a list of device pointers. +template +__host__ std::vector getBlockPtrsFromIndices( + const std::vector& block_indices, + BlockLayer* layer_ptr); + +// Const version +template +__host__ std::vector getBlockPtrsFromIndices( + const std::vector& block_indices, + const BlockLayer& layer_ptr); + +// Allocates blocks in the block_indices list which are not already allocated. +template +void allocateBlocksWhereRequired(const std::vector& block_indices, + BlockLayer>* layer); + +} // namespace nvblox + +#include "nvblox/integrators/impl/integrators_common_impl.h" diff --git a/nvblox/include/nvblox/integrators/projective_color_integrator.h b/nvblox/include/nvblox/integrators/projective_color_integrator.h new file mode 100644 index 00000000..fe1fc1ec --- /dev/null +++ b/nvblox/include/nvblox/integrators/projective_color_integrator.h @@ -0,0 +1,74 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/integrators/projective_integrator_base.h" +#include "nvblox/ray_tracing/sphere_tracer.h" + +namespace nvblox { + +class ProjectiveColorIntegrator : public ProjectiveIntegratorBase { + public: + ProjectiveColorIntegrator(); + virtual ~ProjectiveColorIntegrator(); + + void finish() const override; + + // Main interface - integrate a ColorImage + void integrateFrame(const ColorImage& color_frame, const Transform& T_L_C, + const Camera& camera, const TsdfLayer& tsdf_layer, + ColorLayer* color_layer, + std::vector* updated_blocks = nullptr); + + protected: + void updateBlocks(const std::vector& block_indices, + const ColorImage& color_frame, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + ColorLayer* layer); + + std::vector reduceBlocksToThoseInTruncationBand( + const std::vector& block_indices, const TsdfLayer& tsdf_layer, + const float truncation_distance_m); + + // Params + int depth_render_ray_subsampling_factor_ = 4; + + // Object to do ray tracing to generate occlusions + // TODO(alexmillane): Expose params. + SphereTracer sphere_tracer_; + + // Blocks to integrate on the current call and their indices + // NOTE(alexmillane): We have one pinned host and one device vector and + // transfer between them. + device_vector block_indices_device_; + device_vector block_ptrs_device_; + host_vector block_indices_host_; + host_vector block_ptrs_host_; + + // Buffers for getting blocks in truncation band + device_vector truncation_band_block_ptrs_device_; + host_vector truncation_band_block_ptrs_host_; + device_vector block_in_truncation_band_device_; + host_vector block_in_truncation_band_host_; + + // CUDA stream to process ingration on + cudaStream_t integration_stream_; +}; + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/integrators/projective_integrator_base.h b/nvblox/include/nvblox/integrators/projective_integrator_base.h new file mode 100644 index 00000000..dad3028b --- /dev/null +++ b/nvblox/include/nvblox/integrators/projective_integrator_base.h @@ -0,0 +1,79 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" +#include "nvblox/integrators/frustum.h" + +namespace nvblox { + +class ProjectiveIntegratorBase { + public: + ProjectiveIntegratorBase(){}; + virtual ~ProjectiveIntegratorBase() {} + + // Getters + float truncation_distance_vox() const; + float max_weight() const; + float max_integration_distance_m() const; + + // Setters + void truncation_distance_vox(float truncation_distance_vox); + void max_weight(float max_weight); + void max_integration_distance_m(float max_integration_distance_m); + + // Scale dependent getter + float truncation_distance_m(float block_size) const; + + // Gets blocks in view (called by sub-classes) + // Using the image: + std::vector getBlocksInView(const DepthImage& depth_frame, + const Transform& T_L_C, + const Camera& camera, + float block_size) const; + // Does not use the image, just uses the AABB. + std::vector getBlocksInView(const Transform& T_L_C, + const Camera& camera, + float block_size) const; + // Using the image and raycasting: + std::vector getBlocksInViewUsingRaycasting( + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float block_size) const; + + // Ensure outstanding operations are finished (relevant for integrators + // launching asynchronous work) + virtual void finish() const = 0; + + const FrustumCalculator& frustum_calculator() const { + return frustum_calculator_; + } + FrustumCalculator& frustum_calculator() { return frustum_calculator_; } + + protected: + // Params + float truncation_distance_vox_ = 4.0f; + float max_weight_ = 100.0f; + float max_integration_distance_m_ = 10.0f; + + // Frustum calculation. + mutable FrustumCalculator frustum_calculator_; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/integrators/projective_tsdf_integrator.h b/nvblox/include/nvblox/integrators/projective_tsdf_integrator.h new file mode 100644 index 00000000..38ee4e93 --- /dev/null +++ b/nvblox/include/nvblox/integrators/projective_tsdf_integrator.h @@ -0,0 +1,61 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/gpu_hash/gpu_layer_view.h" +#include "nvblox/integrators/frustum.h" +#include "nvblox/integrators/projective_integrator_base.h" + +namespace nvblox { + +class ProjectiveTsdfIntegrator : public ProjectiveIntegratorBase { + public: + ProjectiveTsdfIntegrator(); + virtual ~ProjectiveTsdfIntegrator(); + + void integrateFrame(const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, TsdfLayer* layer, + std::vector* updated_blocks = nullptr); + + void finish() const override; + + protected: + virtual void updateBlocks(const std::vector& block_indices, + const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, + TsdfLayer* layer); + + // Blocks to integrate on the current call and their indices + // NOTE(alexmillane): We have one pinned host and one device vector and + // transfer between them. + device_vector block_indices_device_; + device_vector block_ptrs_device_; + host_vector block_indices_host_; + host_vector block_ptrs_host_; + + // CUDA stream to process ingration on + cudaStream_t integration_stream_; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/integrators/ray_caster.h b/nvblox/include/nvblox/integrators/ray_caster.h new file mode 100644 index 00000000..53e8a0b7 --- /dev/null +++ b/nvblox/include/nvblox/integrators/ray_caster.h @@ -0,0 +1,50 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/types.h" + +namespace nvblox { + +class RayCaster { + public: + __host__ __device__ inline RayCaster(const Vector3f& origin, + const Vector3f& destination, + float scale = 1.0f); + + // Returns the index, so in "unscaled" coordinates. + __host__ __device__ inline bool nextRayIndex(Index3D* ray_index); + // Returns scaled coordinates. Just the above multiplied by the scale factor. + __host__ __device__ inline bool nextRayPositionScaled(Vector3f* ray_position); + + // Just raycasts over the whole thing and puts them in a vector for you. + __host__ void getAllIndices(std::vector* indices); + + private: + const float scale_ = 1.0f; + + uint32_t ray_length_in_steps_ = 0; + uint32_t current_step_ = 0; + + Vector3f t_to_next_boundary_; + Index3D current_index_; + Index3D ray_step_signs_; + Vector3f t_step_size_; +}; + +} // namespace nvblox + +#include "nvblox/integrators/impl/ray_caster_impl.h" \ No newline at end of file diff --git a/nvblox/include/nvblox/io/csv.h b/nvblox/include/nvblox/io/csv.h new file mode 100644 index 00000000..151108e7 --- /dev/null +++ b/nvblox/include/nvblox/io/csv.h @@ -0,0 +1,35 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/image.h" + +namespace nvblox { +namespace io { + +void writeToCsv(const std::string& filepath, const DepthImage& frame); +void writeToCsv(const std::string& filepath, const ColorImage& frame); + +template +void writeToCsv(const std::string& filepath, + const Eigen::DenseBase& eig); + +} // namespace io +} // namespace nvblox + +#include "nvblox/io/impl/csv_impl.h" \ No newline at end of file diff --git a/nvblox/include/nvblox/io/impl/csv_impl.h b/nvblox/include/nvblox/io/impl/csv_impl.h new file mode 100644 index 00000000..2103d45e --- /dev/null +++ b/nvblox/include/nvblox/io/impl/csv_impl.h @@ -0,0 +1,35 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include + +namespace nvblox { +namespace io { + +template +void writeToCsv(const std::string& filepath, + const Eigen::DenseBase& eig) { + std::ofstream file_stream(filepath, std::ofstream::out); + const Eigen::IOFormat csv_format(Eigen::StreamPrecision, Eigen::DontAlignCols, + ", ", "\n"); + file_stream << eig.format(csv_format); +} + +} // namespace io +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/io/impl/pointcloud_io_impl.h b/nvblox/include/nvblox/io/impl/pointcloud_io_impl.h new file mode 100644 index 00000000..ff8a4018 --- /dev/null +++ b/nvblox/include/nvblox/io/impl/pointcloud_io_impl.h @@ -0,0 +1,91 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/accessors.h" +#include "nvblox/io/ply_writer.h" + +namespace nvblox { +namespace io { + +/// Outputs a voxel layer as a pointcloud with the lambda function deciding the +/// intensity. +template +bool outputVoxelLayerToPly( + const VoxelBlockLayer& layer, const std::string& filename, + std::function lambda) { + // Create a ply writer object. + io::PlyWriter writer(filename); + + // Combine all the voxels in the mesh into a pointcloud. + std::vector points; + std::vector intensities; + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float block_size = layer.block_size(); + const float voxel_size = layer.voxel_size(); + + auto new_lambda = [&points, &intensities, &block_size, &voxel_size, &lambda]( + const Index3D& block_index, const Index3D& voxel_index, + const VoxelType* voxel) { + float intensity = 0.0f; + if (lambda(voxel, &intensity)) { + points.push_back(getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index)); + intensities.push_back(intensity); + } + }; + + // Call above lambda on every voxel in the layer. + callFunctionOnAllVoxels(layer, new_lambda); + + // Add the pointcloud to the ply writer. + writer.setPoints(&points); + writer.setIntensities(&intensities); + + // Write out the ply. + return writer.write(); +} + +/// Specializations for the TSDF type. +template <> +bool outputVoxelLayerToPly(const TsdfLayer& layer, + const std::string& filename) { + constexpr float kMinWeight = 0.1f; + auto lambda = [&kMinWeight](const TsdfVoxel* voxel, float* distance) -> bool { + *distance = voxel->distance; + return voxel->weight > kMinWeight; + }; + return outputVoxelLayerToPly(layer, filename, lambda); +} + +/// Specialization for the ESDF type. +template <> +bool outputVoxelLayerToPly(const EsdfLayer& layer, + const std::string& filename) { + const float voxel_size = layer.voxel_size(); + auto lambda = [&voxel_size](const EsdfVoxel* voxel, float* distance) -> bool { + *distance = voxel_size * std::sqrt(voxel->squared_distance_vox); + if (voxel->is_inside) { + *distance = -*distance; + } + return voxel->observed; + }; + return outputVoxelLayerToPly(layer, filename, lambda); +} + +} // namespace io +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/io/mesh_io.h b/nvblox/include/nvblox/io/mesh_io.h new file mode 100644 index 00000000..f7ae9716 --- /dev/null +++ b/nvblox/include/nvblox/io/mesh_io.h @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/layer.h" +#include "nvblox/mesh/mesh_block.h" + +namespace nvblox { +namespace io { + +// Combines mesh blocks in a layer into a single list of vertices, triangles, +// and normals. +void combineMeshBlocks(const BlockLayer& layer, + std::vector* vertices_ptr, + std::vector* normals_ptr, + std::vector* triangles_ptr); + +bool outputMeshLayerToPly(const BlockLayer& layer, + const std::string& filename); + +} // namespace io +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/io/ply_writer.h b/nvblox/include/nvblox/io/ply_writer.h new file mode 100644 index 00000000..ee69f8a6 --- /dev/null +++ b/nvblox/include/nvblox/io/ply_writer.h @@ -0,0 +1,67 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/color.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace io { +/** + * Writes a mesh to a .ply file. For reference on the format, see: + * http://paulbourke.net/dataformats/ply/ + */ +class PlyWriter { + public: + explicit PlyWriter(const std::string& filename) : file_(filename) {} + + ~PlyWriter() { file_.close(); } + + // Add however many of these you want. + void setPoints(const std::vector* points) { points_ = points; } + void setColors(const std::vector* colors) { colors_ = colors; } + void setIntensities(const std::vector* intensities) { + intensities_ = intensities; + } + void setNormals(const std::vector* normals) { normals_ = normals; } + void setTriangles(const std::vector* triangles) { + triangles_ = triangles; + } + + // Call this after points, normals, triangles, etc. have been added to write + // to file. + bool write(); + + private: + void writeHeader(); + void writePoints(); + void writeTriangles(); + + const std::vector* points_ = nullptr; + const std::vector* normals_ = nullptr; + const std::vector* intensities_ = nullptr; + const std::vector* colors_ = nullptr; + const std::vector* triangles_ = nullptr; + + std::ofstream file_; +}; + +} // namespace io + +} // namespace nvblox diff --git a/nvblox/include/nvblox/io/pointcloud_io.h b/nvblox/include/nvblox/io/pointcloud_io.h new file mode 100644 index 00000000..01318728 --- /dev/null +++ b/nvblox/include/nvblox/io/pointcloud_io.h @@ -0,0 +1,52 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { +namespace io { + +/// Outputs a voxel layer as a pointcloud with the lambda function deciding the +/// intensity. +/// The lambda outputs a boolean, saying whether that voxel should be +/// visualized, and an intensity which will be written to the pointcloud. +template +bool outputVoxelLayerToPly( + const VoxelBlockLayer& layer, const std::string& filename, + std::function lambda); + +/// Without specifying a lambda, this outputs the distance as intensity. +template +bool outputVoxelLayerToPly(const VoxelBlockLayer& layer, + const std::string& filename); + +/// Specializations for the TSDF type. +template <> +bool outputVoxelLayerToPly(const TsdfLayer& layer, const std::string& filename); + +/// Specialization for the ESDF type. +template <> +bool outputVoxelLayerToPly(const EsdfLayer& layer, const std::string& filename); + +} // namespace io +} // namespace nvblox + +#include "nvblox/io/impl/pointcloud_io_impl.h" \ No newline at end of file diff --git a/nvblox/include/nvblox/mesh/impl/marching_cubes_table.h b/nvblox/include/nvblox/mesh/impl/marching_cubes_table.h new file mode 100644 index 00000000..d06a0d32 --- /dev/null +++ b/nvblox/include/nvblox/mesh/impl/marching_cubes_table.h @@ -0,0 +1,334 @@ +// The MIT License (MIT) +// Copyright (c) 2014 Matthew Klingensmith and Ivan Dryanovski +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +namespace nvblox { +namespace marching_cubes { + +// Lookup table from the 256 possible cube configurations from +// CalculateVertexConfigurationIndex() to the 0-5 triplets the give the edges +// where the triangle vertices lie. +__device__ __constant__ const int8_t kTriangleTable[256][16] = { + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1}, + {3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1}, + {4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1}, + {9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1}, + {2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1}, + {10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1}, + {5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1}, + {5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1}, + {8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1}, + {2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1}, + {7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1}, + {11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1}, + {5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1}, + {11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1}, + {11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1}, + {9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1}, + {6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1}, + {6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1}, + {6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1}, + {8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1}, + {7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1}, + {3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1}, + {5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1}, + {0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1}, + {9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1}, + {8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1}, + {5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1}, + {0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1}, + {6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1}, + {10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1}, + {10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1}, + {0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1}, + {3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1}, + {6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1}, + {9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1}, + {8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1}, + {3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1}, + {6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1}, + {10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1}, + {10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1}, + {7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1}, + {7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1}, + {2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1}, + {1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1}, + {11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1}, + {8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1}, + {0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1}, + {7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1}, + {7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1}, + {1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1}, + {10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1}, + {0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1}, + {7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1}, + {6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1}, + {9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1}, + {6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1}, + {4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1}, + {10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1}, + {8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1}, + {1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1}, + {8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1}, + {10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1}, + {4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1}, + {10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1}, + {9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1}, + {6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1}, + {7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1}, + {3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1}, + {7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1}, + {3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1}, + {6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1}, + {9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1}, + {1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1}, + {4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1}, + {7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1}, + {6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1}, + {0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1}, + {6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1}, + {0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1}, + {11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1}, + {6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1}, + {5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1}, + {9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1}, + {1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1}, + {1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1}, + {10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1}, + {0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1}, + {5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1}, + {10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1}, + {11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1}, + {9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1}, + {7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1}, + {2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1}, + {9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1}, + {9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1}, + {1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1}, + {9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1}, + {9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1}, + {0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1}, + {10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1}, + {2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1}, + {0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1}, + {0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1}, + {9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1}, + {5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1}, + {3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1}, + {5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1}, + {0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1}, + {9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1}, + {1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1}, + {3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1}, + {4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1}, + {9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1}, + {11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1}, + {11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1}, + {2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1}, + {9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1}, + {3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1}, + {1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1}, + {4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1}, + {4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1}, + {3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1}, + {0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1}, + {9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1}, + {1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}; + +// Lookup table mapping the number of vertices in the table above. +__device__ __constant__ const uint8_t kNumVertsTable[256] = { + 0, 3, 3, 6, 3, 6, 6, 9, 3, 6, 6, 9, 6, 9, 9, 6, 3, 6, 6, + 9, 6, 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 3, 6, 6, 9, 6, 9, + 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 6, 9, 12, 12, 9, 9, + 12, 12, 9, 12, 15, 15, 6, 3, 6, 6, 9, 6, 9, 9, 12, 6, 9, 9, 12, + 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 12, 15, 15, + 12, 6, 9, 9, 12, 9, 12, 6, 9, 9, 12, 12, 15, 12, 15, 9, 6, 9, 12, + 12, 9, 12, 15, 9, 6, 12, 15, 15, 12, 15, 6, 12, 3, 3, 6, 6, 9, 6, + 9, 9, 12, 6, 9, 9, 12, 9, 12, 12, 9, 6, 9, 9, 12, 9, 12, 12, 15, + 9, 6, 12, 9, 12, 9, 15, 6, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, + 15, 12, 15, 15, 12, 9, 12, 12, 9, 12, 15, 15, 12, 12, 9, 15, 6, 15, 12, + 6, 3, 6, 9, 9, 12, 9, 12, 12, 15, 9, 12, 12, 15, 6, 9, 9, 6, 9, + 12, 12, 15, 12, 15, 15, 6, 12, 9, 15, 12, 9, 6, 12, 3, 9, 12, 12, 15, + 12, 15, 9, 12, 12, 15, 15, 6, 9, 12, 6, 3, 6, 9, 9, 6, 9, 12, 6, + 3, 9, 6, 12, 3, 6, 3, 3, 0, +}; + +// Number of triangles for each case +__device__ __constant__ const uint8_t kNumTrianglesTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 2, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 3, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 3, + 2, 3, 3, 2, 3, 4, 4, 3, 3, 4, 4, 3, 4, 5, 5, 2, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 3, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 4, + 2, 3, 3, 4, 3, 4, 2, 3, 3, 4, 4, 5, 4, 5, 3, 2, 3, 4, 4, 3, 4, 5, 3, 2, + 4, 5, 5, 4, 5, 2, 4, 1, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 3, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 2, 4, 3, 4, 3, 5, 2, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 4, 3, 4, 4, 3, 4, 5, 5, 4, 4, 3, 5, 2, 5, 4, 2, 1, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 2, 3, 3, 2, 3, 4, 4, 5, 4, 5, 5, 2, + 4, 3, 5, 4, 3, 2, 4, 1, 3, 4, 4, 5, 4, 5, 3, 4, 4, 5, 5, 2, 3, 4, 2, 1, + 2, 3, 3, 2, 3, 4, 2, 1, 3, 2, 4, 1, 2, 1, 1, 0}; + +// Lookup table from the 12 cube edge indices to their corresponding corner +// indices. +__device__ __constant__ const uint8_t kEdgeIndexPairs[12][2] = { + {0, 1}, {1, 2}, {2, 3}, {3, 0}, {4, 5}, {5, 6}, + {6, 7}, {7, 4}, {0, 4}, {1, 5}, {2, 6}, {3, 7}}; + +// Lookup table for voxel neighbors. +__device__ __constant__ const uint8_t kCornerIndexOffsets[8][3] = { + {0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, + {0, 0, 1}, {1, 0, 1}, {1, 1, 1}, {0, 1, 1}}; + +} // namespace marching_cubes +} // namespace nvblox diff --git a/nvblox/include/nvblox/mesh/marching_cubes.h b/nvblox/include/nvblox/mesh/marching_cubes.h new file mode 100644 index 00000000..a6714b3b --- /dev/null +++ b/nvblox/include/nvblox/mesh/marching_cubes.h @@ -0,0 +1,90 @@ +// The MIT License (MIT) +// Copyright (c) 2014 Matthew Klingensmith and Ivan Dryanovski +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include "nvblox/core/types.h" +#include "nvblox/mesh/mesh_block.h" + +namespace nvblox { +namespace marching_cubes { + +// This (internal) struct contains intermediate results of marching cubes. +struct PerVoxelMarchingCubesResults { + // The 3D positions of the corners of a 2x2x2 cube of voxels formed by the + // surrounding voxel neighbours in the positive direction of each coordinate. + Vector3f vertex_coords[8]; + // The value of the TSDF at each of the neighbouring voxels described above. + float vertex_sdf[8]; + // Does this voxel contain a mesh? (Does it straddle a zero level set?) + bool contains_mesh = false; + // The index into the marching cubes triangle table (found in + // nvblox/mesh/impl/marching_cubes_table.h). This index is determined based on + // the tsdf configuration (+/-) of the surrounding voxels and is the main + // contribution of marching cubes algorithm. + uint8_t marching_cubes_table_index = 0; + // At the end of marching cubes, vertices calculated for this and other voxels + // in this MeshBlock are stored in a single vector. This member indicates where + // in this vertex vector the vertices associated with this voxel begin. It is + // calculated through an exclusive prefix sum of the numbers of vertices + // in each voxel of this MeshBlock. + int vertex_vector_start_index; +}; + +// Performs the marching cubes algorithm to generate a mesh layer from a TSDF. +// Implementation taken heavily from Open Chisel +// https://github.com/personalrobotics/OpenChisel + +// Calculate the vertex configuration of a given set of neighbor distances. +__host__ __device__ int calculateVertexConfiguration(const float vertex_sdf[8]); + +// This is for blocks access in the kernel. +__host__ __device__ int neighborIndexFromDirection(const Index3D& direction); +__host__ __device__ Index3D directionFromNeighborIndex(const int index); + +// Output (edge coords) is 12 long. Should be preallocated. +__host__ __device__ void interpolateEdgeVertices( + const PerVoxelMarchingCubesResults& marching_cubes_results, + Eigen::Matrix* edge_coords); + +/// Performs linear interpolation on two cube corners to find the approximate +/// zero crossing (surface) value. +__host__ __device__ Vector3f interpolateVertex(const Vector3f& vertex1, + const Vector3f& vertex2, + float sdf1, float sdf2); + +// Actually populate the mesh block. +__host__ void meshCube( + const PerVoxelMarchingCubesResults& marching_cubes_results, + MeshBlock* mesh); + +__device__ void calculateOutputIndex( + PerVoxelMarchingCubesResults* marching_cubes_results, int* size); + +__device__ void calculateVertices( + const PerVoxelMarchingCubesResults& marching_cubes_results, + CudaMeshBlock* mesh); + +} // namespace marching_cubes +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/mesh/mesh.h b/nvblox/include/nvblox/mesh/mesh.h new file mode 100644 index 00000000..4e00d9c1 --- /dev/null +++ b/nvblox/include/nvblox/mesh/mesh.h @@ -0,0 +1,39 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include + +namespace nvblox { + +// A structure which holds a Mesh. +// Generally produced as the result of fusing MeshBlocks in a Layer +// into a single mesh. +// NOTE(alexmillane): Currently only on the CPU. +struct Mesh { + // Data + std::vector vertices; + std::vector normals; + std::vector triangles; + std::vector colors; + + // Factory + static Mesh fromLayer(const BlockLayer& layer); +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/mesh/mesh_block.h b/nvblox/include/nvblox/mesh/mesh_block.h new file mode 100644 index 00000000..56726366 --- /dev/null +++ b/nvblox/include/nvblox/mesh/mesh_block.h @@ -0,0 +1,86 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/color.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_ptr.h" +#include "nvblox/core/unified_vector.h" + +namespace nvblox { + +// A mesh block containing all of the triangles from this block. +// Each block contains only the UPPER part of its neighbors: i.e., the max +// x, y, and z axes. Its neighbors are responsible for the rest. +struct MeshBlock { + typedef std::shared_ptr Ptr; + typedef std::shared_ptr ConstPtr; + + MeshBlock(MemoryType memory_type = MemoryType::kDevice); + + // Mesh Data + // These unified vectors contain the mesh data for this block. Note that + // Colors and/or intensities are optional. The "triangles" vector is a vector + // of indices into the vertices vector. Triplets of consecutive elements form + // triangles with the indexed vertices as their corners. + unified_vector vertices; + unified_vector normals; + unified_vector colors; + unified_vector intensities; + unified_vector triangles; + + void clear(); + + // Resize and reserve. + void resizeToNumberOfVertices(size_t new_size); + void reserveNumberOfVertices(size_t new_capacity); + + size_t size() const; + size_t capacity() const; + + // Resize colors/intensities such that: + // `colors.size()/intensities.size() == vertices.size()` + void expandColorsToMatchVertices(); + void expandIntensitiesToMatchVertices(); + + // Copy mesh data to the CPU. + std::vector getVertexVectorOnCPU() const; + std::vector getNormalVectorOnCPU() const; + std::vector getTriangleVectorOnCPU() const; + std::vector getColorVectorOnCPU() const; + + // Note(alexmillane): Memory type ignored, MeshBlocks live in CPU memory. + static Ptr allocate(MemoryType memory_type); +}; + +// Helper struct for mesh blocks on CUDA. +// NOTE: We need this because we cant pass MeshBlock to kernel functions because +// of the presence of unified_vector members. +struct CudaMeshBlock { + CudaMeshBlock() = default; + CudaMeshBlock(MeshBlock* block); + + Vector3f* vertices; + Vector3f* normals; + int* triangles; + Color* colors; + int size; +}; + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/mesh/mesh_integrator.h b/nvblox/include/nvblox/mesh/mesh_integrator.h new file mode 100644 index 00000000..e17852e6 --- /dev/null +++ b/nvblox/include/nvblox/mesh/mesh_integrator.h @@ -0,0 +1,133 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/mesh/marching_cubes.h" +#include "nvblox/mesh/mesh_block.h" + +namespace nvblox { + +class MeshIntegrator { + public: + MeshIntegrator(); + ~MeshIntegrator(); + + /// Chooses the default mesher between CPU and GPU. + bool integrateMeshFromDistanceField( + const TsdfLayer& distance_layer, BlockLayer* mesh_layer, + const DeviceType device_type = DeviceType::kGPU); + + /// Integrates only the selected blocks from the distance layer. + bool integrateBlocksCPU(const TsdfLayer& distance_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer); + + bool integrateBlocksGPU(const TsdfLayer& distance_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer); + + // Color mesh layer. + // TODO(alexmillane): Currently these functions color vertices by taking the + // CLOSEST color. Would be good to have an option at least for interpolation. + void colorMesh(const ColorLayer& color_layer, MeshLayer* mesh_layer); + void colorMesh(const ColorLayer& color_layer, + const std::vector& block_indices, + MeshLayer* mesh_layer); + void colorMeshGPU(const ColorLayer& color_layer, MeshLayer* mesh_layer); + void colorMeshGPU(const ColorLayer& color_layer, + const std::vector& block_indices, + MeshLayer* mesh_layer); + void colorMeshCPU(const ColorLayer& color_layer, MeshLayer* mesh_layer); + void colorMeshCPU(const ColorLayer& color_layer, + const std::vector& block_indices, + MeshLayer* mesh_layer); + + float min_weight() const { return min_weight_; } + float& min_weight() { return min_weight_; } + + bool weld_vertices() const { return weld_vertices_; } + bool& weld_vertices() { return weld_vertices_; } + + private: + bool isBlockMeshable(const VoxelBlock::ConstPtr block, + float cutoff) const; + + bool getTriangleCandidatesAroundVoxel( + const VoxelBlock::ConstPtr block, + const std::vector::ConstPtr>& neighbor_blocks, + const Index3D& index, const Vector3f& voxel_position, + const float voxel_size, + marching_cubes::PerVoxelMarchingCubesResults* neighbors); + + void getTriangleCandidatesInBlock( + const VoxelBlock::ConstPtr block, + const std::vector::ConstPtr>& neighbor_blocks, + const Index3D& block_index, const float block_size, + std::vector* + triangle_candidates); + + void getMeshableBlocksGPU(const TsdfLayer& distance_layer, + const std::vector& block_indices, + float cutoff_distance, + std::vector* meshable_blocks); + + void meshBlocksGPU(const TsdfLayer& distance_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer); + + void weldVertices(const std::vector& block_indices, + BlockLayer* mesh_layer); + + // Minimum weight to actually mesh. + float min_weight_ = 1e-4; + + /// Whether to perform vertex welding or not. It's slow but cuts down number + /// of vertices by 5x. + bool weld_vertices_ = false; + + // Offsets for cube indices. + Eigen::Matrix cube_index_offsets_; + + // The color that the mesh takes if no coloring is available. + Color default_mesh_color_ = Color::Gray(); + + // State. + cudaStream_t cuda_stream_ = nullptr; + + // These are temporary variables so we don't have to allocate every single + // frame. + host_vector*> block_ptrs_host_; + device_vector*> block_ptrs_device_; + host_vector meshable_host_; + device_vector meshable_device_; + host_vector block_positions_host_; + device_vector block_positions_device_; + host_vector mesh_blocks_host_; + device_vector mesh_blocks_device_; + // Caches for welding. + device_vector input_vertices_; + device_vector input_normals_; + + // Intermediate marching cube results. + device_vector + marching_cubes_results_device_; + device_vector mesh_block_sizes_device_; + host_vector mesh_block_sizes_host_; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/nvblox.h b/nvblox/include/nvblox/nvblox.h new file mode 100644 index 00000000..bb13f1ab --- /dev/null +++ b/nvblox/include/nvblox/nvblox.h @@ -0,0 +1,28 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/mapper.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/esdf_integrator.h" +#include "nvblox/integrators/projective_color_integrator.h" +#include "nvblox/integrators/projective_integrator_base.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/csv.h" +#include "nvblox/mesh/mesh_integrator.h" diff --git a/nvblox/include/nvblox/primitives/impl/scene_impl.h b/nvblox/include/nvblox/primitives/impl/scene_impl.h new file mode 100644 index 00000000..be00ec60 --- /dev/null +++ b/nvblox/include/nvblox/primitives/impl/scene_impl.h @@ -0,0 +1,112 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original voxblox license, which this file is heavily based on: ---- +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#pragma once + +#include "nvblox/core/accessors.h" +#include "nvblox/core/bounding_boxes.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { +namespace primitives { + +template <> +inline void Scene::setVoxel(float dist, TsdfVoxel* voxel) const { + voxel->distance = dist; + voxel->weight = 1.0f; // Just to make sure it gets visualized/meshed/etc. +} + +template +void Scene::generateSdfFromScene(float max_dist, + VoxelBlockLayer* layer) const { + CHECK(layer->memory_type() == MemoryType::kUnified) + << "For scene generation the layer must be CPU accessible " + "(MemoryType::kUnified)."; + timing::Timer sim_timer("primitives/generate_sdf"); + + CHECK_NOTNULL(layer); + + float block_size = layer->block_size(); + + // First allocate all the blocks within the AABB. + std::vector block_indices = + getBlockIndicesTouchedByBoundingBox(block_size, aabb_); + + for (const Index3D& block_index : block_indices) { + layer->allocateBlockAtIndex(block_index); + } + + // Iterate over every voxel in the layer and compute its distance to all + // objects. + auto lambda = [&block_size, &max_dist, this](const Index3D& block_index, + const Index3D& voxel_index, + VoxelType* voxel) { + const Vector3f position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + + if (!aabb_.contains(position)) { + return; + } + + // Iterate over all objects and get distances to this thing. + // Only computes up to max_distance away from the voxel (to reduce amount + // of ray casting). + float voxel_dist = getSignedDistanceToPoint(position, max_dist); + + // Then update the thing. + // Also truncate the distance *inside* the obstacle, which is not truncated + // by the previous function. + voxel_dist = std::max(voxel_dist, -max_dist); + setVoxel(voxel_dist, voxel); + }; + + // Call above lambda on every voxel in the layer. + callFunctionOnAllVoxels(layer, lambda); +} + +} // namespace primitives +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/primitives/primitives.h b/nvblox/include/nvblox/primitives/primitives.h new file mode 100644 index 00000000..d532914c --- /dev/null +++ b/nvblox/include/nvblox/primitives/primitives.h @@ -0,0 +1,169 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original voxblox license, which this file is heavily based on: ---- +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#pragma once + +#include + +#include "nvblox/core/color.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace primitives { + +/// Base class for primitive objects. +class Primitive { + public: + enum class Type { kPlane, kCube, kSphere, kCylinder }; + + // Epsilon for ray intersection and computation. + static constexpr float kEpsilon = 1e-4; + + Primitive(const Vector3f& center, Type type) + : Primitive(center, type, Color::White()) {} + Primitive(const Vector3f& center, Type type, const Color& color) + : center_(center), type_(type), color_(color) {} + virtual ~Primitive() {} + + /// Map-building accessors. + virtual float getDistanceToPoint(const Vector3f& point) const = 0; + + Color getColor() const { return color_; } + Type getType() const { return type_; } + + /// Raycasting accessors. + virtual bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const = 0; + + protected: + Vector3f center_; + Type type_; + Color color_; +}; + +class Sphere : public Primitive { + public: + Sphere(const Vector3f& center, float radius) + : Primitive(center, Type::kSphere), radius_(radius) {} + Sphere(const Vector3f& center, float radius, const Color& color) + : Primitive(center, Type::kSphere, color), radius_(radius) {} + + virtual float getDistanceToPoint(const Vector3f& point) const override; + + virtual bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const override; + + protected: + float radius_; +}; + +class Cube : public Primitive { + public: + Cube(const Vector3f& center, const Vector3f& size) + : Primitive(center, Type::kCube), size_(size) {} + Cube(const Vector3f& center, const Vector3f& size, const Color& color) + : Primitive(center, Type::kCube, color), size_(size) {} + + virtual float getDistanceToPoint(const Vector3f& point) const override; + virtual bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const override; + + protected: + Vector3f size_; +}; + +/// Requires normal being passed in to ALREADY BE NORMALIZED!!!! +class Plane : public Primitive { + public: + Plane(const Vector3f& center, const Vector3f& normal) + : Primitive(center, Type::kPlane), normal_(normal) { + CHECK_NEAR(normal.norm(), 1.0, 1e-3); + } + Plane(const Vector3f& center, const Vector3f& normal, const Color& color) + : Primitive(center, Type::kPlane, color), normal_(normal) { + CHECK_NEAR(normal.norm(), 1.0, 1e-3); + } + + virtual float getDistanceToPoint(const Vector3f& point) const override; + + virtual bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const override; + + protected: + Vector3f normal_; +}; + +/// Cylinder centered on the XY plane! +class Cylinder : public Primitive { + public: + Cylinder(const Vector3f& center, float radius, float height) + : Primitive(center, Type::kCylinder), radius_(radius), height_(height) {} + Cylinder(const Vector3f& center, float radius, float height, + const Color& color) + : Primitive(center, Type::kCylinder, color), + radius_(radius), + height_(height) {} + + virtual float getDistanceToPoint(const Vector3f& point) const override; + + virtual bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const override; + + protected: + float radius_; + float height_; +}; + +} // namespace primitives +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/include/nvblox/primitives/scene.h b/nvblox/include/nvblox/primitives/scene.h new file mode 100644 index 00000000..57a9bead --- /dev/null +++ b/nvblox/include/nvblox/primitives/scene.h @@ -0,0 +1,118 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original voxblox license, which this file is heavily based on: ---- +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/camera.h" +#include "nvblox/core/image.h" +#include "nvblox/core/layer.h" +#include "nvblox/primitives/primitives.h" + +namespace nvblox { +namespace primitives { + +class Scene { + public: + Scene(); + + /// === Creating an environment === + void addPrimitive(std::unique_ptr primitive); + + /// Convenience functions for setting up bounded areas. + void addGroundLevel(float height); + void addCeiling(float height); + + /// Add 4 walls (infinite planes) bounding the space. In case this is not the + /// desired behavior, can use addObject to add walls manually one by one. + /// If infinite walls are undesirable, then use cubes. + void addPlaneBoundaries(float x_min, float x_max, float y_min, float y_max); + + /// Deletes all objects! + void clear(); + + /// === Generating synthetic data from environment === + /// Generates a synthetic view given camera parameters and a transformation + /// of the camera to the scene. + void generateDepthImageFromScene(const Camera& camera, const Transform& T_S_C, + float max_dist, + DepthImage* depth_frame) const; + + /// === Computing ground truth SDFs === + template + void generateSdfFromScene(float max_dist, + VoxelBlockLayer* layer) const; + + /// Computes distance to an arbitrary point across all objects. + /// Positive distance is computed only up to max_dist, though negative + /// distance may be smaller than -max_dist. + float getSignedDistanceToPoint(const Vector3f& coords, float max_dist) const; + + /// Get the intersection of a ray with the first hit object in the scene. + bool getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* ray_intersection, float* ray_dist) const; + + const AxisAlignedBoundingBox& aabb() const { return aabb_; } + AxisAlignedBoundingBox& aabb() { return aabb_; } + + protected: + template + inline void setVoxel(float dist, VoxelType* voxel) const; + + /// Vector storing pointers to all the objects in this world. + std::vector> primitives_; + + /// World boundaries... Can be changed arbitrarily, just sets ground truth + /// generation and visualization bounds, accurate only up to block size. + AxisAlignedBoundingBox aabb_; +}; + +} // namespace primitives +} // namespace nvblox + +#include "nvblox/primitives/impl/scene_impl.h" diff --git a/nvblox/include/nvblox/ray_tracing/sphere_tracer.h b/nvblox/include/nvblox/ray_tracing/sphere_tracer.h new file mode 100644 index 00000000..9516de2f --- /dev/null +++ b/nvblox/include/nvblox/ray_tracing/sphere_tracer.h @@ -0,0 +1,74 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/gpu_hash/gpu_layer_view.h" + +namespace nvblox { + +struct Ray { + Vector3f direction; + Vector3f origin; +}; + +class SphereTracer { + public: + struct Params { + Params() {} + + int maximum_steps = 100; + float maximum_ray_length_m = 15.0f; + float surface_distance_epsilon_vox = 0.1f; + }; + + SphereTracer(Params params = Params()); + ~SphereTracer(); + + // Render an image + // We return a pointer to the buffer on GPU where the image is rendered. + // Note that additional calls to this function will change the contents of the + // pointed-to image. + std::shared_ptr renderImageOnGPU( + const Camera& camera, const Transform& T_S_C, const TsdfLayer& tsdf_layer, + const float truncation_distance_m, + const MemoryType output_image_memory_type = MemoryType::kDevice, + const int ray_subsampling_factor = 1); + + // NOTE(alexmillane): This starts a single threaded kernel to cast the ray + // and so is not efficient. Intended for testing rather than production. + bool castOnGPU(const Ray& ray, const TsdfLayer& tsdf_layer, + const float truncation_distance_m, float* t) const; + + const Params& params() const { return params_; } + Params& params() { return params_; } + + private: + // Device working space + float* t_device_; + bool* success_flag_device_; + std::shared_ptr depth_image_; + + Params params_; + cudaStream_t tracing_stream_; +}; + +} // namespace nvblox diff --git a/nvblox/include/nvblox/utils/nvtx_ranges.h b/nvblox/include/nvblox/utils/nvtx_ranges.h new file mode 100644 index 00000000..6be20dc3 --- /dev/null +++ b/nvblox/include/nvblox/utils/nvtx_ranges.h @@ -0,0 +1,55 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include + +#include "nvblox/core/color.h" + +namespace nvblox { +namespace timing { + +class NvtxRange { + public: + NvtxRange(const std::string& message, const Color& color, + bool constructed_stopped = false); + NvtxRange(const std::string& message, bool constructed_stopped = false); + ~NvtxRange(); + + NvtxRange() = delete; + NvtxRange(const NvtxRange& other) = delete; + + void Start(); + void Stop(); + + bool Started() const { return started_; }; + + private: + void Init(const std::string& message, const Color& color); + void Init(const std::string& message, const uint32_t color); + + bool started_; + nvtxEventAttributes_t event_attributes_; + nvtxRangeId_t id_; +}; + +void mark(const std::string& message, const Color& color); +void mark(const std::string& message); + +} // namespace timing +} // namespace nvblox diff --git a/nvblox/include/nvblox/utils/timing.h b/nvblox/include/nvblox/utils/timing.h new file mode 100644 index 00000000..8286e66e --- /dev/null +++ b/nvblox/include/nvblox/utils/timing.h @@ -0,0 +1,225 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original license for this file: ---- + * Copyright (C) 2012-2013 Simon Lynen, ASL, ETH Zurich, Switzerland + * You can contact the author at + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Adapted from Paul Furgale Schweizer Messer sm_timing */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "nvblox/utils/nvtx_ranges.h" + +namespace nvblox { +namespace timing { + +// Forward declaration +class TimerChrono; +class TimerNvtx; +class DummyTimer; + +// The project wide typedef defining the type of timer to be used +typedef TimerNvtx Timer; + +template +class Accumulator { + public: + Accumulator() + : window_samples_(0), + totalsamples_(0), + window_sum_(0), + sum_(0), + min_(std::numeric_limits::max()), + max_(std::numeric_limits::min()) {} + + void Add(T sample) { + if (window_samples_ < N) { + samples_[window_samples_++] = sample; + window_sum_ += sample; + } else { + T& oldest = samples_[window_samples_++ % N]; + window_sum_ += sample - oldest; + oldest = sample; + } + sum_ += sample; + ++totalsamples_; + if (sample > max_) { + max_ = sample; + } + if (sample < min_) { + min_ = sample; + } + } + + int TotalSamples() const { return totalsamples_; } + + double Sum() const { return sum_; } + + double Mean() const { return sum_ / totalsamples_; } + + double RollingMean() const { + return window_sum_ / std::min(window_samples_, N); + } + + double Max() const { return max_; } + + double Min() const { return min_; } + + double LazyVariance() const { + if (window_samples_ == 0) { + return 0.0; + } + double var = 0; + double mean = RollingMean(); + for (int i = 0; i < std::min(window_samples_, N); ++i) { + var += (samples_[i] - mean) * (samples_[i] - mean); + } + var /= std::min(window_samples_, N); + return var; + } + + private: + int window_samples_; + int totalsamples_; + Total window_sum_; + Total sum_; + T min_; + T max_; + T samples_[N]; +}; + +struct TimerMapValue { + TimerMapValue() {} + + /// Create an accumulator with specified window size. + Accumulator acc_; +}; + +/** + * A class that has the timer interface but does nothing. Swapping this in in + * place of the Timer class (say with a typedef) should allow one to disable + * timing. Because all of the functions are inline, they should just disappear. + */ +class DummyTimer { + public: + explicit DummyTimer(size_t /*handle*/, bool /*constructStopped*/ = false) {} + explicit DummyTimer(std::string const& /*tag*/, + bool /*constructStopped*/ = false) {} + ~DummyTimer() {} + + void Start() {} + void Stop() {} + bool IsTiming() { return false; } +}; + +class TimerChrono { + public: + explicit TimerChrono(size_t handle, bool constructStopped = false); + explicit TimerChrono(std::string const& tag, bool constructStopped = false); + ~TimerChrono(); + + void Start(); + void Stop(); + bool IsTiming() const; + + private: + std::chrono::time_point time_; + + bool timing_; + size_t handle_; +}; + +class TimerNvtx { + public: + explicit TimerNvtx(std::string const& tag, const Color& color, + bool constructStopped = false); + explicit TimerNvtx(std::string const& tag, bool constructStopped = false); + + void Start(); + void Stop(); + bool IsTiming() const; + + private: + NvtxRange nvtx_range_; + TimerChrono timer_; +}; + +class Timing { + public: + typedef std::map map_t; + friend class TimerChrono; + // Definition of static functions to query the timers. + static size_t GetHandle(std::string const& tag); + static std::string GetTag(size_t handle); + static double GetTotalSeconds(size_t handle); + static double GetTotalSeconds(std::string const& tag); + static double GetMeanSeconds(size_t handle); + static double GetMeanSeconds(std::string const& tag); + static size_t GetNumSamples(size_t handle); + static size_t GetNumSamples(std::string const& tag); + static double GetVarianceSeconds(size_t handle); + static double GetVarianceSeconds(std::string const& tag); + static double GetMinSeconds(size_t handle); + static double GetMinSeconds(std::string const& tag); + static double GetMaxSeconds(size_t handle); + static double GetMaxSeconds(std::string const& tag); + static double GetHz(size_t handle); + static double GetHz(std::string const& tag); + static void Print(std::ostream& out); + static std::string Print(); + static std::string SecondsToTimeString(double seconds); + static void Reset(); + static const map_t& GetTimers() { return Instance().tagMap_; } + + private: + void AddTime(size_t handle, double seconds); + + static Timing& Instance(); + + Timing(); + ~Timing(); + + typedef std::vector list_t; + + list_t timers_; + map_t tagMap_; + size_t maxTagLength_; + std::mutex mutex_; +}; + +} // namespace timing +} // namespace nvblox diff --git a/nvblox/src/benchmarks/sphere_benchmark.cpp b/nvblox/src/benchmarks/sphere_benchmark.cpp new file mode 100644 index 00000000..7a2a48b7 --- /dev/null +++ b/nvblox/src/benchmarks/sphere_benchmark.cpp @@ -0,0 +1,179 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/image.h" +#include "nvblox/core/layer.h" +#include "nvblox/integrators/esdf_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/mesh_io.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/io/pointcloud_io.h" +#include "nvblox/mesh/mesh_block.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/primitives/scene.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +class SphereBenchmark { + public: + SphereBenchmark(); + + void runBenchmark(const std::string& csv_output_path = ""); + bool outputMesh(const std::string& ply_output_path); + + private: + // Settings. Do not modify or the benchmark isn't comparable. + static constexpr float kVoxelSize = 0.05; + static constexpr float kBlockSize = + VoxelBlock::kVoxelsPerSide * kVoxelSize; + static constexpr int kNumTrajectoryPoints = 80; + static constexpr float kSphereRadius = 2.0f; + static constexpr float kTrajectoryRadius = 4.0f; + static constexpr float kMaxEnvironmentDimension = 5.0f; + + // Actual layers. + TsdfLayer tsdf_layer_; + EsdfLayer esdf_layer_; + MeshLayer mesh_layer_; + + // Simulated camera. + constexpr static float fu_ = 300; + constexpr static float fv_ = 300; + constexpr static int width_ = 640; + constexpr static int height_ = 480; + constexpr static float cu_ = static_cast(width_) / 2.0f; + constexpr static float cv_ = static_cast(height_) / 2.0f; + Camera camera_; +}; + +SphereBenchmark::SphereBenchmark() + : tsdf_layer_(kVoxelSize, MemoryType::kDevice), + esdf_layer_(kVoxelSize, MemoryType::kUnified), + mesh_layer_(kBlockSize, MemoryType::kUnified), + camera_(Camera(fu_, fv_, cu_, cv_, width_, height_)) {} + +void SphereBenchmark::runBenchmark(const std::string& csv_output_path) { + // Create an integrator with default settings. + ProjectiveTsdfIntegrator integrator; + MeshIntegrator mesh_integrator; + EsdfIntegrator esdf_integrator; + esdf_integrator.max_distance_m() = 4.0; + + // Scene is bounded to the dimensions above. + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox( + Vector3f(-kMaxEnvironmentDimension, -kMaxEnvironmentDimension, 0.0f), + Vector3f(kMaxEnvironmentDimension, kMaxEnvironmentDimension, + kMaxEnvironmentDimension)); + // Create a scene with a ground plane and a sphere. + scene.addGroundLevel(0.0f); + scene.addCeiling(kMaxEnvironmentDimension); + scene.addPrimitive(std::make_unique( + Vector3f(0.0f, 0.0f, kSphereRadius), kSphereRadius)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene.addPlaneBoundaries(-kMaxEnvironmentDimension, kMaxEnvironmentDimension, + -kMaxEnvironmentDimension, kMaxEnvironmentDimension); + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Create a depth frame. We share this memory buffer for the entire + // trajectory. + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kSphereRadius); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + scene.generateDepthImageFromScene( + camera_, T_S_C, 2 * kMaxEnvironmentDimension, &depth_frame); + + std::vector updated_blocks; + // Integrate this depth image. + { + timing::Timer integration_timer("benchmark/integrate_tsdf"); + integrator.integrateFrame(depth_frame, T_S_C, camera_, &tsdf_layer_, + &updated_blocks); + } + + // Integrate the mesh. + { + timing::Timer mesh_timer("benchmark/integrate_mesh"); + mesh_integrator.integrateBlocksGPU(tsdf_layer_, updated_blocks, + &mesh_layer_); + } + + // Integrate the ESDF. + { + timing::Timer esdf_timer("benchmark/integrate_esdf"); + esdf_integrator.integrateBlocksOnGPU(tsdf_layer_, updated_blocks, + &esdf_layer_); + } + } +} + +bool SphereBenchmark::outputMesh(const std::string& ply_output_path) { + timing::Timer timer_write("mesh/write"); + return io::outputMeshLayerToPly(mesh_layer_, ply_output_path); +} + +} // namespace nvblox + +int main(int argc, char* argv[]) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + + nvblox::warmupCuda(); + + std::string output_mesh_path = ""; + if (argc >= 2) { + output_mesh_path = argv[1]; + } + + nvblox::SphereBenchmark benchmark; + benchmark.runBenchmark(""); + + if (!output_mesh_path.empty()) { + benchmark.outputMesh(output_mesh_path); + } + + std::cout << nvblox::timing::Timing::Print(); + + return 0; +} diff --git a/nvblox/src/core/bounding_boxes.cpp b/nvblox/src/core/bounding_boxes.cpp new file mode 100644 index 00000000..6752035f --- /dev/null +++ b/nvblox/src/core/bounding_boxes.cpp @@ -0,0 +1,73 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/bounding_boxes.h" + +namespace nvblox { + +AxisAlignedBoundingBox getAABBOfObservedVoxels(const EsdfLayer& layer) { + AxisAlignedBoundingBox aabb; + auto lambda = [&aabb, &layer](const Index3D& block_index, + const Index3D& voxel_index, + const EsdfVoxel* voxel) -> void { + if (voxel->observed) { + Vector3f p = getPositionFromBlockIndexAndVoxelIndex( + layer.block_size(), block_index, voxel_index); + aabb.extend(p); + p += layer.block_size() * Vector3f::Ones(); + aabb.extend(p); + } + }; + callFunctionOnAllVoxels(layer, lambda); + return aabb; +} + +AxisAlignedBoundingBox getAABBOfObservedVoxels(const TsdfLayer& layer, + const float min_weight) { + AxisAlignedBoundingBox aabb; + auto lambda = [&aabb, &layer, min_weight](const Index3D& block_index, + const Index3D& voxel_index, + const TsdfVoxel* voxel) -> void { + if (voxel->weight > min_weight) { + Vector3f p = getPositionFromBlockIndexAndVoxelIndex( + layer.block_size(), block_index, voxel_index); + aabb.extend(p); + p += layer.block_size() * Vector3f::Ones(); + aabb.extend(p); + } + }; + callFunctionOnAllVoxels(layer, lambda); + return aabb; +} + +AxisAlignedBoundingBox getAABBOfObservedVoxels(const ColorLayer& layer, + const float min_weight) { + AxisAlignedBoundingBox aabb; + auto lambda = [&aabb, &layer, min_weight](const Index3D& block_index, + const Index3D& voxel_index, + const ColorVoxel* voxel) -> void { + if (voxel->weight > min_weight) { + Vector3f p = getPositionFromBlockIndexAndVoxelIndex( + layer.block_size(), block_index, voxel_index); + aabb.extend(p); + p += layer.block_size() * Vector3f::Ones(); + aabb.extend(p); + } + }; + callFunctionOnAllVoxels(layer, lambda); + return aabb; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/core/camera.cpp b/nvblox/src/core/camera.cpp new file mode 100644 index 00000000..97d6e898 --- /dev/null +++ b/nvblox/src/core/camera.cpp @@ -0,0 +1,181 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/camera.h" + +namespace nvblox { + +AxisAlignedBoundingBox Camera::getViewAABB(const Transform& T_L_C, + const float min_depth, + const float max_depth) const { + // Get the bounding corners of this view. + Eigen::Matrix corners_C = getViewCorners(min_depth, max_depth); + + Vector3f aabb_min, aabb_max; + aabb_min.setConstant(std::numeric_limits::max()); + aabb_max.setConstant(std::numeric_limits::lowest()); + + // Transform it into the layer coordinate frame. + for (size_t i = 0; i < corners_C.rows(); i++) { + const Vector3f& corner_C = corners_C.row(i); + Vector3f corner_L = T_L_C * corner_C; + for (int i = 0; i < 3; i++) { + aabb_min(i) = std::min(aabb_min(i), corner_L(i)); + aabb_max(i) = std::max(aabb_max(i), corner_L(i)); + } + } + + return AxisAlignedBoundingBox(aabb_min, aabb_max); +} + +Frustum Camera::getViewFrustum(const Transform& T_L_C, const float min_depth, + const float max_depth) const { + return Frustum(*this, T_L_C, min_depth, max_depth); +} + +Eigen::Matrix Camera::getViewCorners(const float min_depth, + const float max_depth) const { + // Rays through the corners of the image plane + // Clockwise from the top left corner of the image. + const Vector3f ray_0_C = + rayFromImagePlaneCoordinates(Vector2f(0.0f, 0.0f)); // NOLINT + const Vector3f ray_1_C = + rayFromImagePlaneCoordinates(Vector2f(width_, 0.0f)); // NOLINT + const Vector3f ray_2_C = + rayFromImagePlaneCoordinates(Vector2f(width_, height_)); // NOLINT + const Vector3f ray_3_C = + rayFromImagePlaneCoordinates(Vector2f(0.0f, height_)); // NOLINT + + // True bounding box from the 3D points + Eigen::Matrix corners_C; + corners_C.row(0) = min_depth * ray_2_C; + corners_C.row(1) = min_depth * ray_1_C; + corners_C.row(2) = min_depth * ray_0_C, + corners_C.row(3) = min_depth * ray_3_C; + corners_C.row(4) = max_depth * ray_2_C; + corners_C.row(5) = max_depth * ray_1_C; + corners_C.row(6) = max_depth * ray_0_C; + corners_C.row(7) = max_depth * ray_3_C; + return corners_C; +} + +// Frustum definitions. +Frustum::Frustum(const Camera& camera, const Transform& T_L_C, float min_depth, + float max_depth) { + Eigen::Matrix corners_C = + camera.getViewCorners(min_depth, max_depth); + computeBoundingPlanes(corners_C, T_L_C); +} +void Frustum::computeBoundingPlanes(const Eigen::Matrix& corners_C, + const Transform& T_L_C) { + // Transform the corners. + const Eigen::Matrix corners_L = + (T_L_C * corners_C.transpose()).transpose(); + + // Near plane first. + bounding_planes_L_[0].setFromPoints(corners_L.row(0), corners_L.row(2), + corners_L.row(1)); + // Far plane. + bounding_planes_L_[1].setFromPoints(corners_L.row(4), corners_L.row(5), + corners_L.row(6)); + // Left. + bounding_planes_L_[2].setFromPoints(corners_L.row(3), corners_L.row(7), + corners_L.row(6)); + // Right. + bounding_planes_L_[3].setFromPoints(corners_L.row(0), corners_L.row(5), + corners_L.row(4)); + // Top. + bounding_planes_L_[4].setFromPoints(corners_L.row(3), corners_L.row(4), + corners_L.row(7)); + // Bottom. + bounding_planes_L_[5].setFromPoints(corners_L.row(2), corners_L.row(6), + corners_L.row(5)); + + // Calculate AABB. + Vector3f aabb_min, aabb_max; + + aabb_min.setConstant(std::numeric_limits::max()); + aabb_max.setConstant(std::numeric_limits::lowest()); + + for (int i = 0; i < corners_L.cols(); i++) { + for (size_t j = 0; j < corners_L.rows(); j++) { + aabb_min(i) = std::min(aabb_min(i), corners_L(j, i)); + aabb_max(i) = std::max(aabb_max(i), corners_L(j, i)); + } + } + + aabb_ = AxisAlignedBoundingBox(aabb_min, aabb_max); +} + +bool Frustum::isPointInView(const Vector3f& point) const { + // Skip the AABB check, assume already been done. + for (size_t i = 0; i < bounding_planes_L_.size(); i++) { + if (!bounding_planes_L_[i].isPointInside(point)) { + return false; + } + } + return true; +} + +bool Frustum::isAABBInView(const AxisAlignedBoundingBox& aabb) const { + // If we're not even close, don't bother checking the planes. + if (!aabb_.intersects(aabb)) { + return false; + } + constexpr int kNumCorners = 8; + + // Check the center of the bounding box to see if it's within the AABB. + // This covers a corner case where the given AABB is larger than the + // frustum. + if (isPointInView(aabb.center())) { + return true; + } + + // Iterate over all the corners of the bounding box and see if any are + // within the view frustum. + for (int i = 0; i < kNumCorners; i++) { + if (isPointInView( + aabb.corner(static_cast(i)))) { + return true; + } + } + return false; +} + +// Bounding plane definitions. +void BoundingPlane::setFromPoints(const Vector3f& p1, const Vector3f& p2, + const Vector3f& p3) { + Vector3f p1p2 = p2 - p1; + Vector3f p1p3 = p3 - p1; + + Vector3f cross = p1p2.cross(p1p3); + normal_ = cross.normalized(); + distance_ = normal_.dot(p1); +} + +void BoundingPlane::setFromDistanceNormal(const Vector3f& normal, + float distance) { + normal_ = normal; + distance_ = distance; +} + +bool BoundingPlane::isPointInside(const Vector3f& point) const { + if (point.dot(normal_) >= distance_) { + return true; + } + return false; +} + +} // namespace nvblox diff --git a/nvblox/src/core/color.cpp b/nvblox/src/core/color.cpp new file mode 100644 index 00000000..3132cc3c --- /dev/null +++ b/nvblox/src/core/color.cpp @@ -0,0 +1,38 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/color.h" + +namespace nvblox { + +Color Color::blendTwoColors(const Color& first_color, float first_weight, + const Color& second_color, float second_weight) { + float total_weight = first_weight + second_weight; + + first_weight /= total_weight; + second_weight /= total_weight; + + Color new_color; + new_color.r = static_cast(std::round( + first_color.r * first_weight + second_color.r * second_weight)); + new_color.g = static_cast(std::round( + first_color.g * first_weight + second_color.g * second_weight)); + new_color.b = static_cast(std::round( + first_color.b * first_weight + second_color.b * second_weight)); + + return new_color; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/core/cuda/blox.cu b/nvblox/src/core/cuda/blox.cu new file mode 100644 index 00000000..b5872055 --- /dev/null +++ b/nvblox/src/core/cuda/blox.cu @@ -0,0 +1,44 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" + +namespace nvblox { + +// Must be called with: +// - a single block +// - one thread per voxel +__global__ void setColorBlockGray(ColorBlock* block_device_ptr) { + ColorVoxel* voxel_ptr = + &block_device_ptr->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + voxel_ptr->color.r = 127; + voxel_ptr->color.g = 127; + voxel_ptr->color.b = 127; + voxel_ptr->weight = 0.0f; +} + +void setColorBlockGrayOnGPU(ColorBlock* block_device_ptr) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + setColorBlockGray<<<1, kThreadsPerBlock>>>(block_device_ptr); + // NOTE(alexmillane): At the moment we launch this allocation on the default + // stream which implicitly synchronizes. At some point in the future we should + // probably move this to a stream. + // checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +} // namespace nvblox diff --git a/nvblox/src/core/cuda/error_check.cu b/nvblox/src/core/cuda/error_check.cu new file mode 100644 index 00000000..0d4bc34d --- /dev/null +++ b/nvblox/src/core/cuda/error_check.cu @@ -0,0 +1,34 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/cuda/error_check.cuh" + +#include + +namespace nvblox { + +void check_cuda(cudaError_t result, char const* const func, + const char* const file, int const line) { + if (result) { + std::cerr << "CUDA error = " << static_cast(result) << " at " + << file << ":" << line << " '" << func + << "'. Error string: " << cudaGetErrorString(result) << ".\n"; + // Make sure we call CUDA Device Reset before exiting + cudaDeviceReset(); + exit(99); + } +} + +} // namespace nvblox diff --git a/nvblox/src/core/cuda/image_cuda.cu b/nvblox/src/core/cuda/image_cuda.cu new file mode 100644 index 00000000..51e3a861 --- /dev/null +++ b/nvblox/src/core/cuda/image_cuda.cu @@ -0,0 +1,51 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/cuda/image_cuda.h" + +#include +#include +#include + +#include "nvblox/core/cuda/error_check.cuh" + +namespace nvblox { +namespace cuda { + +float max(const int rows, const int cols, const float* image) { + const thrust::device_ptr dev_ptr(image); + const thrust::device_ptr max_elem = + thrust::max_element(thrust::device, dev_ptr, dev_ptr + (rows * cols)); + return *max_elem; +} + +float min(const int rows, const int cols, const float* image) { + const thrust::device_ptr dev_ptr(image); + const thrust::device_ptr min_elem = + thrust::min_element(thrust::device, dev_ptr, dev_ptr + (rows * cols)); + return *min_elem; +} + +std::pair minmax(const int rows, const int cols, + const float* image) { + // Wrap our memory and reduce using thrust + const thrust::device_ptr dev_ptr(image); + const auto minmax_elem = + thrust::minmax_element(thrust::device, dev_ptr, dev_ptr + (rows * cols)); + return {*minmax_elem.first, *minmax_elem.second}; +} + +} // namespace cuda +} // namespace nvblox diff --git a/nvblox/src/core/cuda/warmup.cu b/nvblox/src/core/cuda/warmup.cu new file mode 100644 index 00000000..4a3cb861 --- /dev/null +++ b/nvblox/src/core/cuda/warmup.cu @@ -0,0 +1,32 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/cuda/warmup.h" + +namespace nvblox { + +__global__ void warm_up_gpu() { + unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; + float ia, ib; + ia = ib = 0.0f; + ib += ia + tid ; +} + +void warmupCuda() { + warm_up_gpu<<<64, 128>>>(); + cudaDeviceSynchronize(); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/core/image.cpp b/nvblox/src/core/image.cpp new file mode 100644 index 00000000..8f9f74db --- /dev/null +++ b/nvblox/src/core/image.cpp @@ -0,0 +1,71 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/image.h" + +#include + +#include "nvblox/core/cuda/image_cuda.h" +#include "nvblox/io/csv.h" + +namespace nvblox { +namespace image { + +float max(const DepthImage& image) { return maxGPU(image); } + +float min(const DepthImage& image) { return minGPU(image); } + +std::pair minmax(const DepthImage& image) { + return minmaxGPU(image); +} + +float maxGPU(const DepthImage& image) { + return cuda::max(image.rows(), image.cols(), image.dataConstPtr()); +} + +float minGPU(const DepthImage& image) { + return cuda::min(image.rows(), image.cols(), image.dataConstPtr()); +} + +std::pair minmaxGPU(const DepthImage& image) { + const auto minmax_elem = + cuda::minmax(image.rows(), image.cols(), image.dataConstPtr()); + return {minmax_elem.first, minmax_elem.second}; +} + +float maxCPU(const DepthImage& image) { + CHECK(image.memory_type() == MemoryType::kUnified) + << "CPU function called on kDevice image."; + return *std::max_element(image.dataConstPtr(), + image.dataConstPtr() + image.numel()); +} + +float minCPU(const DepthImage& image) { + CHECK(image.memory_type() == MemoryType::kUnified) + << "CPU function called on kDevice image."; + return *std::min_element(image.dataConstPtr(), + image.dataConstPtr() + image.numel()); +} + +std::pair minmaxCPU(const DepthImage& image) { + CHECK(image.memory_type() == MemoryType::kUnified) + << "CPU function called on kDevice image."; + const auto res = std::minmax_element(image.dataConstPtr(), + image.dataConstPtr() + image.numel()); + return {*res.first, *res.second}; +} + +} // namespace image +} // namespace nvblox diff --git a/nvblox/src/core/interpolation_3d.cpp b/nvblox/src/core/interpolation_3d.cpp new file mode 100644 index 00000000..ccb1fc7d --- /dev/null +++ b/nvblox/src/core/interpolation_3d.cpp @@ -0,0 +1,57 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/interpolation_3d.h" + +#include + +namespace nvblox { +namespace interpolation { + +bool interpolateOnCPU(const Vector3f& p_L, const TsdfLayer& layer, + float* distance_ptr) { + CHECK_NOTNULL(distance_ptr); + CHECK(layer.memory_type() == MemoryType::kUnified) + << "For CPU-based interpolation, the layer must be CPU accessible (ie " + "MemoryType::kUnified)."; + auto get_distance_lambda = [](const TsdfVoxel& voxel) -> float { + return voxel.distance; + }; + constexpr float kMinWeight = 1e-4; + auto voxel_valid_lambda = [](const TsdfVoxel& voxel) -> bool { + return voxel.weight > kMinWeight; + }; + return internal::interpolateMemberOnCPU( + p_L, layer, get_distance_lambda, voxel_valid_lambda, distance_ptr); +} + +bool interpolateOnCPU(const Vector3f& p_L, const EsdfLayer& layer, + float* distance_ptr) { + CHECK_NOTNULL(distance_ptr); + CHECK(layer.memory_type() == MemoryType::kUnified) + << "For CPU-based interpolation, the layer must be CPU accessible (ie " + "MemoryType::kUnified)."; + auto get_distance_lambda = [](const EsdfVoxel& voxel) -> float { + return std::sqrt(voxel.squared_distance_vox); + }; + auto voxel_valid_lambda = [](const EsdfVoxel& voxel) -> bool { + return voxel.observed; + }; + return internal::interpolateMemberOnCPU( + p_L, layer, get_distance_lambda, voxel_valid_lambda, distance_ptr); +} + +} // namespace interpolation +} // namespace nvblox diff --git a/nvblox/src/core/mapper.cpp b/nvblox/src/core/mapper.cpp new file mode 100644 index 00000000..1b691462 --- /dev/null +++ b/nvblox/src/core/mapper.cpp @@ -0,0 +1,90 @@ +#include "nvblox/core/mapper.h" + +namespace nvblox { + +RgbdMapper::RgbdMapper(float voxel_size_m, MemoryType memory_type) + : voxel_size_m_(voxel_size_m) { + layers_ = LayerCake::create( + voxel_size_m_, memory_type); +} + +void RgbdMapper::integrateDepth(const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera) { + // Call the integrator. + std::vector updated_blocks; + tsdf_integrator_.integrateFrame(depth_frame, T_L_C, camera, + layers_.getPtr(), &updated_blocks); + + // Update all the relevant queues. + mesh_blocks_to_update_.insert(updated_blocks.begin(), updated_blocks.end()); + esdf_blocks_to_update_.insert(updated_blocks.begin(), updated_blocks.end()); +} + +void RgbdMapper::integrateColor(const ColorImage& color_frame, + const Transform& T_L_C, const Camera& camera) { + color_integrator_.integrateFrame(color_frame, T_L_C, camera, + layers_.get(), + layers_.getPtr()); +} + +std::vector RgbdMapper::updateMesh() { + // Convert the set of MeshBlocks needing an update to a vector + std::vector mesh_blocks_to_update_vector( + mesh_blocks_to_update_.begin(), mesh_blocks_to_update_.end()); + + // Call the integrator. + mesh_integrator_.integrateBlocksGPU(layers_.get(), + mesh_blocks_to_update_vector, + layers_.getPtr()); + + mesh_integrator_.colorMesh(layers_.get(), + mesh_blocks_to_update_vector, + layers_.getPtr()); + + // Mark blocks as updated + mesh_blocks_to_update_.clear(); + + return mesh_blocks_to_update_vector; +} + +std::vector RgbdMapper::updateEsdf() { + CHECK(esdf_mode_ != EsdfMode::k2D) + << "Currently, we limit computation of the ESDF to 2d *or* 3d. Not both."; + esdf_mode_ = EsdfMode::k3D; + + // Convert the set of EsdfBlocks needing an update to a vector + std::vector esdf_blocks_to_update_vector( + esdf_blocks_to_update_.begin(), esdf_blocks_to_update_.end()); + + esdf_integrator_.integrateBlocksOnGPU(layers_.get(), + esdf_blocks_to_update_vector, + layers_.getPtr()); + + // Mark blocks as updated + esdf_blocks_to_update_.clear(); + + return esdf_blocks_to_update_vector; +} + +std::vector RgbdMapper::updateEsdfSlice(float slice_input_z_min, + float slice_input_z_max, + float slice_output_z) { + CHECK(esdf_mode_ != EsdfMode::k3D) + << "Currently, we limit computation of the ESDF to 2d *or* 3d. Not both."; + esdf_mode_ = EsdfMode::k2D; + + // Convert the set of MeshBlocks needing an update to a vector + std::vector esdf_blocks_to_update_vector( + esdf_blocks_to_update_.begin(), esdf_blocks_to_update_.end()); + + esdf_integrator_.integrateSliceOnGPU( + layers_.get(), esdf_blocks_to_update_vector, slice_input_z_min, + slice_input_z_max, slice_output_z, layers_.getPtr()); + + // Mark blocks as updated + esdf_blocks_to_update_.clear(); + + return esdf_blocks_to_update_vector; +} + +} // namespace nvblox diff --git a/nvblox/src/datasets/image_loader.cpp b/nvblox/src/datasets/image_loader.cpp new file mode 100644 index 00000000..af952e2c --- /dev/null +++ b/nvblox/src/datasets/image_loader.cpp @@ -0,0 +1,101 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/datasets/image_loader.h" + +#include + +#include "nvblox/utils/timing.h" + +#define STB_IMAGE_IMPLEMENTATION +#include "nvblox/datasets/external/stb_image.h" + +namespace nvblox { +namespace datasets { + +bool load16BitDepthImage(const std::string& filename, + DepthImage* depth_frame_ptr, MemoryType memory_type) { + CHECK_NOTNULL(depth_frame_ptr); + timing::Timer stbi_timer("file_loading/depth_image/stbi"); + int width, height, num_channels; + uint16_t* image_data = + stbi_load_16(filename.c_str(), &width, &height, &num_channels, 0); + stbi_timer.Stop(); + + if (image_data == nullptr) { + return false; + } + CHECK_EQ(num_channels, 1); + + // The image is expressed in mm. We need to divide by 1000 and convert to + // float. + // TODO(alexmillane): It's likely better to do this on the GPU. + // Follow up: I measured and this scaling + cast takes + // ~1ms. So only do this when 1ms is relevant. + std::vector float_image_data(height * width); + for (int lin_idx = 0; lin_idx < float_image_data.size(); lin_idx++) { + float_image_data[lin_idx] = + static_cast(image_data[lin_idx]) / 1000.0f; + } + + *depth_frame_ptr = DepthImage::fromBuffer( + height, width, float_image_data.data(), memory_type); + + stbi_image_free(image_data); + return true; +} + +bool load8BitColorImage(const std::string& filename, + ColorImage* color_image_ptr, MemoryType memory_type) { + CHECK_NOTNULL(color_image_ptr); + timing::Timer stbi_timer("file_loading/color_image/stbi"); + int width, height, num_channels; + uint8_t* image_data = + stbi_load(filename.c_str(), &width, &height, &num_channels, 0); + stbi_timer.Stop(); + + if (image_data == nullptr) { + return false; + } + CHECK_EQ(num_channels, 3); + + CHECK_EQ(sizeof(Color), 3 * sizeof(uint8_t)) + << "Color is padded so image loading wont work."; + + *color_image_ptr = ColorImage::fromBuffer( + height, width, reinterpret_cast(image_data), memory_type); + + stbi_image_free(image_data); + return true; +} + +template <> +bool ImageLoader::getImage(int image_idx, DepthImage* image_ptr) { + CHECK_NOTNULL(image_ptr); + bool res = load16BitDepthImage(index_to_filepath_(image_idx), image_ptr, + memory_type_); + return res; +} + +template <> +bool ImageLoader::getImage(int image_idx, ColorImage* image_ptr) { + CHECK_NOTNULL(image_ptr); + bool res = load8BitColorImage(index_to_filepath_(image_idx), image_ptr, + memory_type_); + return res; +} + +} // namespace datasets +} // namespace nvblox diff --git a/nvblox/src/datasets/parse_3dmatch.cpp b/nvblox/src/datasets/parse_3dmatch.cpp new file mode 100644 index 00000000..cb720493 --- /dev/null +++ b/nvblox/src/datasets/parse_3dmatch.cpp @@ -0,0 +1,138 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/datasets/parse_3dmatch.h" + +#include + +#include +#include +#include +#include + +namespace nvblox { +namespace datasets { +namespace threedmatch { + +// Parses a 4x4 matrix from a text file in 3D Match format: space-separated. +bool parsePoseFromFile(const std::string& filename, Transform* transform) { + CHECK_NOTNULL(transform); + constexpr int kDimension = 4; + + std::ifstream fin(filename); + if (fin.is_open()) { + for (int row = 0; row < kDimension; row++) + for (int col = 0; col < kDimension; col++) { + float item = 0.0; + fin >> item; + (*transform)(row, col) = item; + } + fin.close(); + return true; + } + return false; +} + +// Parse 3x3 camera intrinsics matrix from 3D Match format: space-separated. +bool parseCameraFromFile(const std::string& filename, + Eigen::Matrix3f* intrinsics) { + CHECK_NOTNULL(intrinsics); + constexpr int kDimension = 3; + + std::ifstream fin(filename); + if (fin.is_open()) { + for (int row = 0; row < kDimension; row++) + for (int col = 0; col < kDimension; col++) { + float item = 0.0; + fin >> item; + (*intrinsics)(row, col) = item; + } + fin.close(); + + return true; + } + return false; +} + +std::string getPathForCameraIntrinsics(const std::string& base_path) { + return base_path + "/camera-intrinsics.txt"; +} + +std::string getPathForFramePose(const std::string& base_path, const int seq_id, + const int frame_id) { + std::stringstream ss; + ss << base_path << "/seq-" << std::setfill('0') << std::setw(2) << seq_id + << "/frame-" << std::setw(6) << frame_id << ".pose.txt"; + + return ss.str(); +} + +std::string getPathForDepthImage(const std::string& base_path, const int seq_id, + const int frame_id) { + std::stringstream ss; + ss << base_path << "/seq-" << std::setfill('0') << std::setw(2) << seq_id + << "/frame-" << std::setw(6) << frame_id << ".depth.png"; + + return ss.str(); +} + +std::string getPathForColorImage(const std::string& base_path, const int seq_id, + const int frame_id) { + std::stringstream ss; + ss << base_path << "/seq-" << std::setfill('0') << std::setw(2) << seq_id + << "/frame-" << std::setw(6) << frame_id << ".color.png"; + + return ss.str(); +} + +std::unique_ptr> +createDepthImageLoader(const std::string& base_path, const int seq_id, + MemoryType memory_type) { + return std::make_unique>( + std::bind(getPathForDepthImage, base_path, seq_id, std::placeholders::_1), + memory_type); +} + +std::unique_ptr> +createColorImageLoader(const std::string& base_path, const int seq_id, + MemoryType memory_type) { + return std::make_unique>( + std::bind(getPathForColorImage, base_path, seq_id, std::placeholders::_1), + memory_type); +} + +std::unique_ptr> +createMultithreadedDepthImageLoader(const std::string& base_path, + const int seq_id, const int num_threads, + MemoryType memory_type) { + return std::make_unique< + nvblox::datasets::MultiThreadedImageLoader>( + std::bind(getPathForDepthImage, base_path, seq_id, std::placeholders::_1), + num_threads, memory_type); +} + +std::unique_ptr> +createMultithreadedColorImageLoader(const std::string& base_path, + const int seq_id, const int num_threads, + MemoryType memory_type) { + return std::make_unique< + nvblox::datasets::MultiThreadedImageLoader>( + std::bind(getPathForColorImage, base_path, seq_id, std::placeholders::_1), + num_threads, memory_type); +} + +} // namespace threedmatch +} // namespace datasets +} // namespace nvblox diff --git a/nvblox/src/gpu_hash/cuda/gpu_layer_view.cu b/nvblox/src/gpu_hash/cuda/gpu_layer_view.cu new file mode 100644 index 00000000..6bb0bdaf --- /dev/null +++ b/nvblox/src/gpu_hash/cuda/gpu_layer_view.cu @@ -0,0 +1,33 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/gpu_hash/gpu_layer_view.h" + +#include +#include +#include + +#include "nvblox/core/common_names.h" +#include "nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh" + +namespace nvblox { + +// Compile Specializations for the standard block types. +template class GPULayerView; +template class GPULayerView; +template class GPULayerView; +template class GPULayerView; + +} // namespace nvblox diff --git a/nvblox/src/gpu_hash/cuda/gpu_set.cu b/nvblox/src/gpu_hash/cuda/gpu_set.cu new file mode 100644 index 00000000..3d1e00ea --- /dev/null +++ b/nvblox/src/gpu_hash/cuda/gpu_set.cu @@ -0,0 +1,33 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/gpu_hash/cuda/gpu_set.cuh" + +namespace nvblox { + +Index3DDeviceSet::Index3DDeviceSet(size_t size) { + set = Index3DDeviceSet_t::createDeviceObject(static_cast(size)); +} +Index3DDeviceSet::~Index3DDeviceSet() { + Index3DDeviceSet_t::destroyDeviceObject(set); +} + +void copySetToVector(const Index3DDeviceSet_t& set, std::vector* vec) { + vec->resize(set.size()); + auto set_iter = set.device_range(); + thrust::copy_n(set_iter.begin(), set_iter.size(), vec->begin()); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/integrators/cuda/esdf_integrator.cu b/nvblox/src/integrators/cuda/esdf_integrator.cu new file mode 100644 index 00000000..7c7eeb43 --- /dev/null +++ b/nvblox/src/integrators/cuda/esdf_integrator.cu @@ -0,0 +1,1214 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/gpu_hash/cuda/gpu_hash_interface.cuh" +#include "nvblox/gpu_hash/cuda/gpu_indexing.cuh" +#include "nvblox/utils/timing.h" + +#include "nvblox/integrators/esdf_integrator.h" + +namespace nvblox { + +EsdfIntegrator::~EsdfIntegrator() { + if (cuda_stream_ != nullptr) { + cudaStreamDestroy(cuda_stream_); + } +} + +void EsdfIntegrator::integrateBlocksOnGPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + EsdfLayer* esdf_layer) { + timing::Timer esdf_timer("esdf/integrate"); + + if (block_indices.empty()) { + return; + } + + // First, check if the stream exists. If not, create one. + if (cuda_stream_ == nullptr) { + checkCudaErrors(cudaStreamCreate(&cuda_stream_)); + } + + timing::Timer allocate_timer("esdf/integrate/allocate"); + // First, allocate all the destination blocks. + allocateBlocksOnCPU(block_indices, esdf_layer); + allocate_timer.Stop(); + + timing::Timer mark_timer("esdf/integrate/mark_sites"); + // Then, mark all the sites on GPU. + // This finds all the blocks that are eligible to be parents. + std::vector blocks_with_sites; + std::vector blocks_to_clear; + markAllSitesOnGPU(tsdf_layer, block_indices, esdf_layer, &blocks_with_sites, + &blocks_to_clear); + mark_timer.Stop(); + + std::vector cleared_blocks; + if (!blocks_to_clear.empty()) { + timing::Timer compute_timer("esdf/integrate/clear"); + clearInvalidOnGPU(blocks_to_clear, esdf_layer, &cleared_blocks); + std::vector all_clear_updated; + } + + timing::Timer compute_timer("esdf/integrate/compute"); + // Parallel block banding on GPU. + computeEsdfOnGPU(blocks_with_sites, esdf_layer); + if (!cleared_blocks.empty()) { + computeEsdfOnGPU(cleared_blocks, esdf_layer); + } + compute_timer.Stop(); +} + +void EsdfIntegrator::integrateSliceOnGPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + float z_min, float z_max, float z_output, EsdfLayer* esdf_layer) { + timing::Timer esdf_timer("esdf/integrate_slice"); + + if (block_indices.empty()) { + return; + } + + // First, check if the stream exists. If not, create one. + if (cuda_stream_ == nullptr) { + checkCudaErrors(cudaStreamCreate(&cuda_stream_)); + } + + timing::Timer allocate_timer("esdf/integrate_slice/allocate"); + // First, allocate all the destination blocks. + allocateBlocksOnCPU(block_indices, esdf_layer); + allocate_timer.Stop(); + + timing::Timer mark_timer("esdf/integrate_slice/mark_sites"); + // Then, mark all the sites on GPU. + // This finds all the blocks that are eligible to be parents. + std::vector blocks_with_sites; + std::vector blocks_to_clear; + markSitesInSliceOnGPU(tsdf_layer, block_indices, z_min, z_max, z_output, + esdf_layer, &blocks_with_sites, &blocks_to_clear); + mark_timer.Stop(); + + std::vector cleared_blocks; + if (!blocks_to_clear.empty()) { + timing::Timer compute_timer("esdf/integrate_slice/clear"); + clearInvalidOnGPU(blocks_to_clear, esdf_layer, &cleared_blocks); + std::vector all_clear_updated; + } + + timing::Timer compute_timer("esdf/integrate_slice/compute"); + // Parallel block banding on GPU. + computeEsdfOnGPU(blocks_with_sites, esdf_layer); + + if (!cleared_blocks.empty()) { + computeEsdfOnGPU(cleared_blocks, esdf_layer); + } + compute_timer.Stop(); +} + +__device__ void clearVoxelDevice(EsdfVoxel* voxel, + float max_squared_distance_vox) { + voxel->parent_direction.setZero(); + voxel->squared_distance_vox = max_squared_distance_vox; +} + +// Takes in a vector of blocks, and outputs an integer true if that block is +// meshable. +// Block size MUST be voxels_per_side x voxels_per_side x voxel_per_size. +// Grid size can be anything. +__global__ void markAllSitesKernel(int num_blocks, + const TsdfBlock** tsdf_blocks, + EsdfBlock** esdf_blocks, + float min_site_distance_m, float min_weight, + float max_squared_distance_vox, + bool* updated, bool* to_clear) { + dim3 voxel_index = threadIdx; + // This for loop allows us to have fewer threadblocks than there are + // blocks in this computation. We assume the threadblock size is constant + // though to make our lives easier. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // Get the correct voxel for this index. + const TsdfVoxel* tsdf_voxel = + &tsdf_blocks[block_index] + ->voxels[voxel_index.x][voxel_index.y][voxel_index.z]; + EsdfVoxel* esdf_voxel = + &esdf_blocks[block_index] + ->voxels[voxel_index.x][voxel_index.y][voxel_index.z]; + if (tsdf_voxel->weight >= min_weight) { + // Mark as inside if the voxel distance is negative. + bool is_inside = tsdf_voxel->distance <= 0.0f; + if (esdf_voxel->is_inside && is_inside == false) { + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + to_clear[block_index] = true; + } + esdf_voxel->is_inside = is_inside; + if (is_inside && fabsf(tsdf_voxel->distance) <= min_site_distance_m) { + esdf_voxel->is_site = true; + esdf_voxel->squared_distance_vox = 0.0f; + esdf_voxel->parent_direction.setZero(); + updated[block_index] = true; + } else { + if (esdf_voxel->is_site) { + esdf_voxel->is_site = false; + // This voxel needs to be cleared. + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + to_clear[block_index] = true; + } else if (!esdf_voxel->observed) { + // This is a brand new voxel. + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + } else if (esdf_voxel->squared_distance_vox <= 1e-4) { + // This is an invalid voxel that should be cleared. + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + to_clear[block_index] = true; + } + } + esdf_voxel->observed = true; + } + } +} + +// From: +// https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda +__device__ __forceinline__ float atomicMinFloat(float* addr, float value) { + float old; + old = (value >= 0) + ? __int_as_float(atomicMin((int*)addr, __float_as_int(value))) + : __uint_as_float( + atomicMax((unsigned int*)addr, __float_as_uint(value))); + + return old; +} + +/// Thread size MUST be 8x8x8, block size can be anything. +__global__ void markSitesInSliceKernel( + int num_input_blocks, int num_output_blocks, const TsdfBlock** tsdf_blocks, + EsdfBlock** esdf_blocks, int output_voxel_index, int input_min_voxel_index, + int input_max_voxel_index, float min_site_distance_m, float min_weight, + float max_squared_distance_vox, bool* updated, bool* cleared) { + dim3 voxel_index = threadIdx; + voxel_index.z = output_voxel_index; + int layer_index = threadIdx.z; + int num_layers = blockDim.z; + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + __shared__ EsdfVoxel new_values[kVoxelsPerSide][kVoxelsPerSide]; + __shared__ bool observed[kVoxelsPerSide][kVoxelsPerSide]; + __shared__ float min_distance[kVoxelsPerSide][kVoxelsPerSide]; + + // Initialize these. + if (layer_index == 0) { + observed[voxel_index.x][voxel_index.y] = false; + min_distance[voxel_index.x][voxel_index.y] = 100.0f; + } + __syncthreads(); + + // This for loop allows us to have fewer threadblocks than there are + // blocks in this computation. We assume the threadblock size is constant + // though to make our lives easier. + for (int block_index = blockIdx.x; block_index < num_output_blocks; + block_index += gridDim.x) { + // Get the correct block for this. + const TsdfBlock* tsdf_block = + tsdf_blocks[block_index + num_output_blocks * layer_index]; + // There's also null pointers in there. + if (tsdf_block != nullptr) { + // Iterate over all of the voxels in this block. + int start_index = 0; + int end_index = kVoxelsPerSide; + if (layer_index == 0) { + start_index = input_min_voxel_index; + } + if (layer_index == num_layers - 1) { + end_index = input_max_voxel_index; + } + for (int i = start_index; i < end_index; i++) { + const TsdfVoxel* tsdf_voxel = + &tsdf_block->voxels[voxel_index.x][voxel_index.y][i]; + // EsdfVoxel* new_voxel = &new_values[voxel_index.x][voxel_index.y]; + // Get the correct voxel for this index. + if (tsdf_voxel->weight >= min_weight) { + observed[voxel_index.x][voxel_index.y] = true; + atomicMinFloat(&min_distance[voxel_index.x][voxel_index.y], + tsdf_voxel->distance); + } + } + } + + // sync threads across everyone trying to update this voxel + __syncthreads(); + + // Ok now only if we're layer 0 do we compare the new and old values and + // decide what to output. + if (layer_index == 0) { + EsdfVoxel* esdf_voxel = + &esdf_blocks[block_index] + ->voxels[voxel_index.x][voxel_index.y][voxel_index.z]; + + // Case 0: Just skip it if it's unobserved. We don't care. + if (!observed[voxel_index.x][voxel_index.y]) { + continue; + } + // Determine if the new value puts us inside or in a site. + bool is_inside = min_distance[voxel_index.x][voxel_index.y] <= 0.0f; + bool is_site = fabsf(min_distance[voxel_index.x][voxel_index.y]) <= + min_site_distance_m && + is_inside; + + // First handle the case where the voxel is a site. + if (is_site) { + if (esdf_voxel->is_site) { + // Ok whatever. Add to the site list. + // Its existing values are fine. + updated[block_index] = true; + } else { + // Wasn't a site before, is now. + esdf_voxel->observed = true; + esdf_voxel->is_site = true; + clearVoxelDevice(esdf_voxel, 0.0f); + updated[block_index] = true; + } + } else { + // Here we have to double-check what's going on. + // If it was a site before, and isn't anymore, we have to clear it. + if (esdf_voxel->is_site) { + esdf_voxel->is_site = false; + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + cleared[block_index] = true; + } + // Otherwise just leave it alone unless it's brand new. + if (!esdf_voxel->observed) { + esdf_voxel->observed = true; + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + } else if (esdf_voxel->is_inside != is_inside) { + // In case the sidedness swapped, clear the voxel. + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + cleared[block_index] = true; + } else if (esdf_voxel->squared_distance_vox <= 0.0f) { + // This is somehow invalidly marked as a site despite the fact + // it shouldn't be. + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + cleared[block_index] = true; + } + } + // Make the sidedness match. + esdf_voxel->is_inside = is_inside; + } + } +} + +__device__ void sweepSingleBand(Index3D voxel_index, int sweep_axis, + float max_squared_distance_vox, + EsdfBlock* esdf_block) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + Index3D last_site; + bool site_found; + // Sweep sweep sweep. + // First we sweep forward, then backwards. + for (int i = 0; i < 2; i++) { + last_site = Index3D::Zero(); + site_found = false; + int direction = 1; + int start_voxel = 0; + int end_voxel = kVoxelsPerSide; + if (i == 1) { + direction = -1; + start_voxel = kVoxelsPerSide - 1; + end_voxel = -1; + } + + for (voxel_index(sweep_axis) = start_voxel; + voxel_index(sweep_axis) != end_voxel; + voxel_index(sweep_axis) += direction) { + EsdfVoxel* esdf_voxel = + &esdf_block + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + if (!esdf_voxel->observed) { + continue; + } + // If this voxel is itself a site, then mark this for future voxels. + if (esdf_voxel->is_site) { + last_site = voxel_index; + site_found = true; + } else if (!site_found) { + // If this voxel isn't a site but we haven't found a site yet, + // then if this voxel is valid we set it as the site. + if (esdf_voxel->squared_distance_vox < max_squared_distance_vox) { + site_found = true; + last_site = esdf_voxel->parent_direction + voxel_index; + } + } else { + // If we've found the site, then should just decide what to do + // here. + Index3D potential_direction = last_site - voxel_index; + float potential_distance = potential_direction.squaredNorm(); + // Either it hasn't been set at all or it's closer to the site + // than to its current value. + if (esdf_voxel->squared_distance_vox > potential_distance) { + esdf_voxel->parent_direction = potential_direction; + esdf_voxel->squared_distance_vox = potential_distance; + } else if (esdf_voxel->squared_distance_vox < + max_squared_distance_vox) { + // If the current value is a better site, then set it as a site. + last_site = esdf_voxel->parent_direction + voxel_index; + } + } + } + } +} +__device__ bool updateSingleNeighbor(const EsdfBlock* esdf_block, + const Index3D& voxel_index, + const Index3D& neighbor_voxel_index, + int axis, int direction, + float max_squared_distance_vox, + EsdfBlock* neighbor_block) { + const EsdfVoxel* esdf_voxel = + &esdf_block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + EsdfVoxel* neighbor_voxel = + &neighbor_block + ->voxels[neighbor_voxel_index.x()][neighbor_voxel_index.y()] + [neighbor_voxel_index.z()]; + if (!esdf_voxel->observed || !neighbor_voxel->observed || + neighbor_voxel->is_site || + esdf_voxel->squared_distance_vox >= max_squared_distance_vox) { + return false; + } + // Determine if we can update this. + Eigen::Vector3i potential_direction = esdf_voxel->parent_direction; + potential_direction(axis) -= direction; + float potential_distance = potential_direction.squaredNorm(); + // TODO: might be some concurrency issues here, have to be a bit careful + // on the corners/edges. + if (neighbor_voxel->squared_distance_vox > potential_distance) { + neighbor_voxel->parent_direction = potential_direction; + neighbor_voxel->squared_distance_vox = potential_distance; + return true; + } + return false; +} + +__device__ bool clearSingleNeighbor(const EsdfBlock* esdf_block, + const Index3D& voxel_index, + const Index3D& neighbor_voxel_index, + int axis, int direction, + float max_squared_distance_vox, + EsdfBlock* neighbor_block) { + const EsdfVoxel* esdf_voxel = + &esdf_block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + EsdfVoxel* neighbor_voxel = + &neighbor_block + ->voxels[neighbor_voxel_index.x()][neighbor_voxel_index.y()] + [neighbor_voxel_index.z()]; + + if (esdf_voxel->squared_distance_vox < max_squared_distance_vox || + !esdf_voxel->observed || neighbor_voxel->is_site || + neighbor_voxel->squared_distance_vox >= max_squared_distance_vox) { + return false; + } + // Determine if we can update this. + Index3D parent_voxel_dir = neighbor_voxel->parent_direction; + if ((direction > 0 && parent_voxel_dir(axis) > 0) || + (direction < 0 && parent_voxel_dir(axis) < 0)) { + return false; + } + + clearVoxelDevice(neighbor_voxel, max_squared_distance_vox); + return true; +} + +/// Thread size MUST be 8x8xN (where N is a number of blocks up to 8), block +/// size can be anything. +__global__ void sweepBlockBandKernel(int num_blocks, EsdfBlock** esdf_blocks, + float max_squared_distance_vox) { + // We go one axis at a time, syncing threads in between. + dim3 thread_index = threadIdx; + thread_index.z = 0; + + for (int block_index = blockIdx.x * blockDim.z + threadIdx.z; + block_index < num_blocks; block_index += gridDim.x * blockDim.z) { + // For simplicity we have to have the same number of blocks in the CUDA + // kernel call as we have actual blocks. + EsdfBlock* esdf_block = esdf_blocks[block_index]; + Index3D voxel_index(0, thread_index.x, thread_index.y); + + // X axis done. + sweepSingleBand(voxel_index, 0, max_squared_distance_vox, esdf_block); + __syncthreads(); + + // Y axis done. + voxel_index << thread_index.x, 0, thread_index.y; + sweepSingleBand(voxel_index, 1, max_squared_distance_vox, esdf_block); + __syncthreads(); + + // Z axis done. + voxel_index << thread_index.x, thread_index.y, 0; + sweepSingleBand(voxel_index, 2, max_squared_distance_vox, esdf_block); + __syncthreads(); + } +} + +/// Thread size MUST be 8x8xN, where N is the number of blocks processed at +/// a time, block size can be anything. +__global__ void updateLocalNeighborBandsKernel(int num_blocks, int i, + EsdfBlock** esdf_blocks, + int* neighbor_table, + EsdfBlock** neighbor_pointers, + float max_squared_distance_vox, + bool* updated_neighbors) { + // We go one axis at a time, syncing threads in between. + dim3 thread_index = threadIdx; + thread_index.z = 0; + + constexpr int kNumNeighbors = 6; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + for (int block_index = blockIdx.x * blockDim.z + threadIdx.z; + block_index < num_blocks; block_index += gridDim.x * blockDim.z) { + EsdfBlock* esdf_block = esdf_blocks[block_index]; + Index3D voxel_index; + Index3D neighbor_voxel_index; + // Each thread updates 1 neighbors, set by "i". + // Get the neighbor block. + int neighbor_index = neighbor_table[block_index * kNumNeighbors + i]; + if (neighbor_index < 0) { + continue; + } + EsdfBlock* neighbor_block = neighbor_pointers[neighbor_index]; + // Now we have the neighbor block... Let's figure out which voxels we + // should look at. + int axis = i / 2; + int direction = i % 2 ? -1 : 1; + + // Fill in the axes. + if (axis == 0) { + voxel_index << 0, thread_index.x, thread_index.y; + } else if (axis == 1) { + voxel_index << thread_index.x, 0, thread_index.y; + } else if (axis == 2) { + voxel_index << thread_index.x, thread_index.y, 0; + } + neighbor_voxel_index = voxel_index; + // If we're looking backwards... + if (direction < 0) { + voxel_index(axis) = 0; + neighbor_voxel_index(axis) = kVoxelsPerSide - 1; + } else { + voxel_index(axis) = kVoxelsPerSide - 1; + neighbor_voxel_index(axis) = 0; + } + + bool updated = updateSingleNeighbor( + esdf_block, voxel_index, neighbor_voxel_index, axis, direction, + max_squared_distance_vox, neighbor_block); + if (updated) { + updated_neighbors[neighbor_index] = true; + } + } +} + +/// Thread size MUST be 8x8x8, block size can be anything. +__global__ void clearWithinBlockKernel(int num_blocks, EsdfBlock** esdf_blocks, + float max_squared_distance_vox) { + dim3 voxel_index = threadIdx; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + // Allow block size to be whatever. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // Get the voxel. + EsdfVoxel* esdf_voxel = + &esdf_blocks[block_index] + ->voxels[voxel_index.x][voxel_index.y][voxel_index.z]; + // Check if its parent is in the same block. + if (!esdf_voxel->observed || esdf_voxel->is_site || + esdf_voxel->squared_distance_vox >= max_squared_distance_vox) { + continue; + } + // Get the parent. + Index3D parent_index = + Index3D(voxel_index.x, voxel_index.y, voxel_index.z) + + esdf_voxel->parent_direction; + + // Check if the voxel is within the same block. + if (parent_index.x() < 0 || parent_index.x() >= kVoxelsPerSide || + parent_index.y() < 0 || parent_index.y() >= kVoxelsPerSide || + parent_index.z() < 0 || parent_index.z() >= kVoxelsPerSide) { + continue; + } + + // Ok check if the parent index is a site. + if (!esdf_blocks[block_index] + ->voxels[parent_index.x()][parent_index.y()][parent_index.z()] + .is_site) { + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + } + } +} + +/// Thread size MUST be 8x8x8, block size can be anything. +__global__ void clearInternalVoxelsKernel(int num_blocks, + EsdfBlock** esdf_blocks, + float max_squared_distance_vox) { + dim3 voxel_index = threadIdx; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + // Allow block size to be whatever. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // Get the voxel. + EsdfVoxel* esdf_voxel = + &esdf_blocks[block_index] + ->voxels[voxel_index.x][voxel_index.y][voxel_index.z]; + if (!esdf_voxel->observed || esdf_voxel->is_site || + esdf_voxel->squared_distance_vox >= max_squared_distance_vox) { + continue; + } + // Get the parent. + Index3D parent_index = + Index3D(voxel_index.x, voxel_index.y, voxel_index.z) + + esdf_voxel->parent_direction; + + // Check if we're our own parent. This is definitely wrong since we're not + // a site. + if (esdf_voxel->parent_direction.isZero()) { + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + continue; + } + + // Get the closest index to the parent within the same block. + // We just get the nearest neighbor. + Index3D closest_parent(min(max(parent_index.x(), 0), kVoxelsPerSide - 1), + min(max(parent_index.y(), 0), kVoxelsPerSide - 1), + min(max(parent_index.z(), 0), kVoxelsPerSide - 1)); + + // Ok check if the parent index is a site. + // TODO: Check if we need the observed rule or not... + const EsdfVoxel& neighbor_voxel = + esdf_blocks[block_index]->voxels[closest_parent.x()][closest_parent.y()] + [closest_parent.z()]; + if (!neighbor_voxel.observed || + neighbor_voxel.squared_distance_vox >= max_squared_distance_vox) { + clearVoxelDevice(esdf_voxel, max_squared_distance_vox); + } + } +} + +/// Thread size MUST be 8x8xN, where N is the number of blocks processed at +/// a time, block size can be anything. +__global__ void clearLocalNeighborBandsKernel(int num_blocks, int i, + EsdfBlock** esdf_blocks, + int* neighbor_table, + EsdfBlock** neighbor_pointers, + float max_squared_distance_vox, + bool* updated_neighbors) { + // We go one axis at a time, syncing threads in between. + dim3 thread_index = threadIdx; + thread_index.z = 0; + + constexpr int kNumNeighbors = 6; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + for (int block_index = blockIdx.x * blockDim.z + threadIdx.z; + block_index < num_blocks; block_index += gridDim.x * blockDim.z) { + EsdfBlock* esdf_block = esdf_blocks[block_index]; + Index3D voxel_index; + Index3D neighbor_voxel_index; + // Each thread updates 1 neighbors, set by "i". + // Get the neighbor block. + int neighbor_index = neighbor_table[block_index * kNumNeighbors + i]; + if (neighbor_index < 0) { + continue; + } + EsdfBlock* neighbor_block = neighbor_pointers[neighbor_index]; + // Now we have the neighbor block... Let's figure out which voxels we + // should look at. + int axis = i / 2; + int direction = i % 2 ? -1 : 1; + + // Fill in the axes. + if (axis == 0) { + voxel_index << 0, thread_index.x, thread_index.y; + } else if (axis == 1) { + voxel_index << thread_index.x, 0, thread_index.y; + } else if (axis == 2) { + voxel_index << thread_index.x, thread_index.y, 0; + } + neighbor_voxel_index = voxel_index; + // If we're looking backwards... + if (direction < 0) { + voxel_index(axis) = 0; + neighbor_voxel_index(axis) = kVoxelsPerSide - 1; + } else { + voxel_index(axis) = kVoxelsPerSide - 1; + neighbor_voxel_index(axis) = 0; + } + + bool updated = clearSingleNeighbor( + esdf_block, voxel_index, neighbor_voxel_index, axis, direction, + max_squared_distance_vox, neighbor_block); + if (updated) { + updated_neighbors[neighbor_index] = true; + } + } +} + +void EsdfIntegrator::markAllSitesOnGPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + EsdfLayer* esdf_layer, std::vector* blocks_with_sites, + std::vector* cleared_blocks) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(blocks_with_sites); + + // Caching. + const float voxel_size = tsdf_layer.voxel_size(); + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + // Cache the minimum distance in metric size. + const float min_site_distance_m = min_site_distance_vox_ * voxel_size; + + int num_blocks = block_indices.size(); + + // Get all of the block pointers we need. + tsdf_pointers_host_.resize(num_blocks); + block_pointers_host_.resize(num_blocks); + + // Have an updated output variable as well. + updated_blocks_device_.resize(num_blocks); + updated_blocks_device_.setZero(); + cleared_blocks_device_.resize(num_blocks); + cleared_blocks_device_.setZero(); + + // Populate all the input vectors. + for (size_t i = 0; i < num_blocks; i++) { + const Index3D& block_index = block_indices[i]; + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + TsdfBlock::ConstPtr tsdf_block = tsdf_layer.getBlockAtIndex(block_index); + + if (!esdf_block || !tsdf_block) { + LOG(ERROR) << "Somehow trying to update non-existent blocks!"; + continue; + } + + tsdf_pointers_host_[i] = tsdf_block.get(); + block_pointers_host_[i] = esdf_block.get(); + } + + // Copy what we need over to the device. + tsdf_pointers_device_ = tsdf_pointers_host_; + block_pointers_device_ = block_pointers_host_; + + // Call the kernel. + int dim_block = num_blocks; + constexpr int kVoxelsPerSide = EsdfBlock::kVoxelsPerSide; + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + markAllSitesKernel<<>>( + num_blocks, tsdf_pointers_device_.data(), block_pointers_device_.data(), + min_site_distance_m, min_weight_, max_squared_distance_vox, + updated_blocks_device_.data(), cleared_blocks_device_.data()); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + // Copy out. + updated_blocks_host_ = updated_blocks_device_; + cleared_blocks_host_ = cleared_blocks_device_; + + // Get the output vector. + // TODO(helen): swap this to a kernel operation. + for (size_t i = 0; i < num_blocks; i++) { + if (updated_blocks_host_[i]) { + blocks_with_sites->push_back(block_indices[i]); + } + if (cleared_blocks_host_[i]) { + cleared_blocks->push_back(block_indices[i]); + } + } +} + +// 2D slice version of the markAllSites function above. +void EsdfIntegrator::markSitesInSliceOnGPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + float min_z, float max_z, float output_z, EsdfLayer* esdf_layer, + std::vector* updated_blocks, + std::vector* cleared_blocks) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(updated_blocks); + CHECK_NOTNULL(cleared_blocks); + + // Caching. + const float voxel_size = tsdf_layer.voxel_size(); + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + // Cache the minimum distance in metric size. + const float min_site_distance_m = min_site_distance_vox_ * voxel_size; + + // We are going to subsample the block_indices. + // We need to figure out all the output blocks, which will be a subset + // of the input blocks. At the same time we need to get all of the stacks + // of input blocks at all levels. + // We are going to pull some "clever" stuff: the input block list will be + // of length N * n_input_blocks, where "N" is the number of vertical + // layers there could be that fall into the min z to max z range. + + // Ok first figure out how many layers we could have. + Index3D min_block_index; + Index3D min_voxel_index; + getBlockAndVoxelIndexFromPositionInLayer(tsdf_layer.block_size(), + Vector3f(0.0f, 0.0f, min_z), + &min_block_index, &min_voxel_index); + const int min_block_index_z = min_block_index.z(); + const int min_voxel_index_z = min_voxel_index.z(); + Index3D max_block_index; + Index3D max_voxel_index; + getBlockAndVoxelIndexFromPositionInLayer(tsdf_layer.block_size(), + Vector3f(0.0f, 0.0f, max_z), + &max_block_index, &max_voxel_index); + const int max_block_index_z = max_block_index.z(); + const int max_voxel_index_z = max_voxel_index.z(); + + // There is always at least 1 layer. + int num_vertical_layers = max_block_index_z - min_block_index_z + 1; + + // And figure out what the index of the output voxel is. + // std::pair output_block_and_voxel_index + Index3D output_block_index; + Index3D output_voxel_index; + getBlockAndVoxelIndexFromPositionInLayer( + tsdf_layer.block_size(), Vector3f(0.0f, 0.0f, output_z), + &output_block_index, &output_voxel_index); + const int output_block_index_z = output_block_index.z(); + const int output_voxel_index_z = output_voxel_index.z(); + + // Next get a list of all the valid input blocks. + Index3DSet output_block_set; + for (const Index3D& block_index : block_indices) { + if (block_index.z() >= min_block_index_z && + block_index.z() <= max_block_index_z) { + output_block_set.insert( + Index3D(block_index.x(), block_index.y(), output_block_index_z)); + } + } + + // Ok now we have all the indices we actually need. + // Just have to get their pointers and we're good. + size_t num_blocks = output_block_set.size(); + if (num_blocks == 0) { + return; + } + + std::vector input_blocks(num_blocks * num_vertical_layers); + std::vector output_blocks(num_blocks); + tsdf_pointers_host_.resize(num_blocks * num_vertical_layers); + tsdf_pointers_host_.setZero(); + block_pointers_host_.resize(num_blocks); + + size_t i = 0; + for (const Index3D& block_index : output_block_set) { + // This is for the output block, which we allocate along the way. + output_blocks[i] = block_index; + block_pointers_host_[i] = + esdf_layer->allocateBlockAtIndex(block_index).get(); + + // Go through all the relevant input pointers: + Index3D input_block_index = block_index; + + int j = 0; + for (input_block_index.z() = min_block_index_z; + input_block_index.z() <= max_block_index_z; input_block_index.z()++) { + input_blocks[i + num_blocks * j] = input_block_index; + // This can be null. It's fine. + tsdf_pointers_host_[i + num_blocks * j] = + tsdf_layer.getBlockAtIndex(input_block_index).get(); + j++; + } + i++; + } + + // Copy what we need over to the device. + tsdf_pointers_device_ = tsdf_pointers_host_; + block_pointers_device_ = block_pointers_host_; + + // Finally, set up the updated and cleared vectors. + updated_blocks_device_.resize(num_blocks); + updated_blocks_device_.setZero(); + cleared_blocks_device_.resize(num_blocks); + cleared_blocks_device_.setZero(); + + // Call the kernel! + int dim_block = num_blocks; + constexpr int kVoxelsPerSide = EsdfBlock::kVoxelsPerSide; + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, num_vertical_layers); + markSitesInSliceKernel<<>>( + num_blocks, num_blocks, tsdf_pointers_device_.data(), + block_pointers_device_.data(), output_voxel_index_z, min_voxel_index_z, + max_voxel_index_z, min_site_distance_m, min_weight_, + max_squared_distance_vox, updated_blocks_device_.data(), + cleared_blocks_device_.data()); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + // Copy out. + updated_blocks_host_ = updated_blocks_device_; + cleared_blocks_host_ = cleared_blocks_device_; + + // Pack the outputs. The rest of the functions should work as before. + for (size_t i = 0; i < output_blocks.size(); i++) { + if (updated_blocks_host_[i]) { + updated_blocks->push_back(output_blocks[i]); + } + if (cleared_blocks_host_[i]) { + cleared_blocks->push_back(output_blocks[i]); + } + } +} + +void EsdfIntegrator::clearInvalidOnGPU( + const std::vector& blocks_to_clear, EsdfLayer* esdf_layer, + std::vector* updated_blocks) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(updated_blocks); + + // Caching. + const float voxel_size = esdf_layer->voxel_size(); + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + int num_blocks = blocks_to_clear.size(); + block_pointers_host_.resize(num_blocks); + + // Have an updated output variable as well. + updated_blocks_device_.resize(num_blocks); + updated_blocks_device_.setZero(); + + // Populate all the input vectors. + for (size_t i = 0; i < num_blocks; i++) { + const Index3D& block_index = blocks_to_clear[i]; + block_pointers_host_[i] = esdf_layer->getBlockAtIndex(block_index).get(); + } + + block_pointers_device_ = block_pointers_host_; + + // Alright now run a kernel to clear all the voxels within a block. + constexpr int kVoxelsPerSide = EsdfBlock::kVoxelsPerSide; + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + clearWithinBlockKernel<<>>( + num_blocks, block_pointers_device_.data(), max_squared_distance_vox); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + // Then clear all the neighbors. + Index3DSet all_cleared_blocks; + std::copy(blocks_to_clear.begin(), blocks_to_clear.end(), + std::inserter(all_cleared_blocks, all_cleared_blocks.end())); + + std::vector clear_list = blocks_to_clear; + std::vector new_clear_list; + VLOG(3) << "Blocks to clear: " << blocks_to_clear.size(); + while (!clear_list.empty()) { + clearBlockNeighbors(clear_list, esdf_layer, &new_clear_list); + std::copy(new_clear_list.begin(), new_clear_list.end(), + std::inserter(all_cleared_blocks, all_cleared_blocks.end())); + std::swap(clear_list, new_clear_list); + new_clear_list.clear(); + VLOG(3) << "Clear list size: " << clear_list.size(); + } + + for (const Index3D& index : all_cleared_blocks) { + updated_blocks->push_back(index); + } +} + +void EsdfIntegrator::clearBlockNeighbors(std::vector& clear_list, + EsdfLayer* esdf_layer, + std::vector* new_clear_list) { + int num_blocks = clear_list.size(); + + if (num_blocks == 0) { + return; + } + constexpr int kNumNeighbors = 6; + const float voxel_size = esdf_layer->voxel_size(); + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + constexpr int kVoxelsPerSide = EsdfBlock::kVoxelsPerSide; + dim3 dim_threads_per_voxel(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + + // Step 0: block pointers. + block_pointers_host_.resize(num_blocks); + for (size_t i = 0; i < num_blocks; i++) { + const Index3D& block_index = clear_list[i]; + block_pointers_host_[i] = esdf_layer->getBlockAtIndex(block_index).get(); + } + block_pointers_device_ = block_pointers_host_; + + // Step 0a: fix up the blocks so their neighbors are valid. + clearInternalVoxelsKernel<<>>( + num_blocks, block_pointers_device_.data(), max_squared_distance_vox); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + + // Step one: set up the neighbor table. + std::vector neighbor_indices; + neighbor_table_host_.resize(num_blocks * kNumNeighbors); + neighbor_table_host_.setZero(); + neighbor_pointers_host_.resize(0); + + createNeighborTable(clear_list, esdf_layer, &neighbor_indices, + &neighbor_pointers_host_, &neighbor_table_host_); + + // Step two: run the neighbor updating kernel. + updated_blocks_device_.resize(neighbor_indices.size()); + updated_blocks_device_.setZero(); + + neighbor_pointers_device_ = neighbor_pointers_host_; + neighbor_table_device_ = neighbor_table_host_; + + constexpr int kNumBlocksPerCudaBlock = 8; + int dim_block = std::max( + static_cast( + std::ceil(num_blocks / static_cast(kNumBlocksPerCudaBlock))), + 1); + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kNumBlocksPerCudaBlock); + // We have to do the neighbors one at a time basically for concurrency + // issues. + // No clue if the concurrency issues hold for the clearing operation. + // But this is easier to copy-and-paste. + for (int i = 0; i < kNumNeighbors; i++) { + clearLocalNeighborBandsKernel<<>>( + num_blocks, i, block_pointers_device_.data(), + neighbor_table_device_.data(), neighbor_pointers_device_.data(), + max_squared_distance_vox, updated_blocks_device_.data()); + } + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + // Repack into output vector. + updated_blocks_host_ = updated_blocks_device_; + block_pointers_host_.resize(0); + + new_clear_list->clear(); + for (size_t i = 0; i < neighbor_indices.size(); i++) { + if (updated_blocks_host_[i]) { + new_clear_list->push_back(neighbor_indices[i]); + block_pointers_host_.push_back(neighbor_pointers_host_[i]); + } + } + + // Step three: clear any remaining voxels on the interior of the blocks + int num_updated_blocks = new_clear_list->size(); + if (num_updated_blocks == 0) { + return; + } + + block_pointers_device_ = block_pointers_host_; + clearInternalVoxelsKernel<<>>(block_pointers_device_.size(), + block_pointers_device_.data(), + max_squared_distance_vox); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); +} + +void EsdfIntegrator::computeEsdfOnGPU( + const std::vector& blocks_with_sites, EsdfLayer* esdf_layer) { + CHECK_NOTNULL(esdf_layer); + // Cache everything. + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + block_pointers_host_.resize(blocks_with_sites.size()); + for (size_t i = 0; i < blocks_with_sites.size(); i++) { + block_pointers_host_[i] = + esdf_layer->getBlockAtIndex(blocks_with_sites[i]).get(); + } + + // First we go over all of the blocks with sites. + // We compute all the proximal sites inside the block first. + block_pointers_device_ = block_pointers_host_; + sweepBlockBandOnGPU(block_pointers_device_, max_squared_distance_vox); + + // Get the neighbors of all the blocks with sites. + std::vector blocks_to_run = blocks_with_sites; + std::vector updated_blocks; + + int i = 0; + while (!blocks_to_run.empty()) { + updateLocalNeighborBandsOnGPU(blocks_to_run, block_pointers_device_, + max_squared_distance_vox, esdf_layer, + &updated_blocks, &neighbor_pointers_device_); + VLOG(3) << "Iteration: " << i + << " Number of updated blocks: " << updated_blocks.size() + << " blocks with sites: " << blocks_with_sites.size(); + i++; + sweepBlockBandOnGPU(neighbor_pointers_device_, max_squared_distance_vox); + blocks_to_run = std::move(updated_blocks); + block_pointers_device_ = neighbor_pointers_device_; + } +} + +void EsdfIntegrator::sweepBlockBandOnGPU( + device_vector& block_pointers, float max_squared_distance_vox) { + if (block_pointers.empty()) { + return; + } + timing::Timer sweep_timer("esdf/integrate/compute/sweep"); + + // Caching. + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const int num_blocks = block_pointers.size(); + + // Call the kernel. + // We do 2-dimensional sweeps in this kernel. Each thread does 3 sweeps. + // We do 8 blocks at a time. + constexpr int kNumBlocksPerCudaBlock = 8; + int dim_block = std::max( + static_cast( + std::ceil(num_blocks / static_cast(kNumBlocksPerCudaBlock))), + 1); + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kNumBlocksPerCudaBlock); + sweepBlockBandKernel<<>>( + num_blocks, block_pointers.data(), max_squared_distance_vox); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); +} + +void EsdfIntegrator::updateLocalNeighborBandsOnGPU( + const std::vector& block_indices, + device_vector& block_pointers, float max_squared_distance_vox, + EsdfLayer* esdf_layer, std::vector* updated_blocks, + device_vector* updated_block_pointers) { + if (block_indices.empty()) { + return; + } + + timing::Timer neighbors_timer("esdf/integrate/compute/neighbors"); + + CHECK_EQ(block_indices.size(), block_pointers.size()); + + constexpr int kNumNeighbors = 6; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const int num_blocks = block_pointers.size(); + + timing::Timer table_timer("esdf/integrate/compute/neighbors/table"); + + // This one is quite a bit more complicated. + // For each block, we need to get its 6 neighbors. + std::vector neighbor_indices; + neighbor_table_host_.resize(num_blocks * kNumNeighbors); + neighbor_table_host_.setZero(); + neighbor_pointers_host_.resize(0); + + createNeighborTable(block_indices, esdf_layer, &neighbor_indices, + &neighbor_pointers_host_, &neighbor_table_host_); + + table_timer.Stop(); + + // Set up an updated map. + updated_blocks_device_.resize(neighbor_indices.size()); + updated_blocks_device_.setZero(); + + neighbor_pointers_device_ = neighbor_pointers_host_; + neighbor_table_device_ = neighbor_table_host_; + + timing::Timer kernel_timer("esdf/integrate/compute/neighbors/kernel"); + + // Ok now we have to give all this stuff to the kernel. + // TODO(helen): you get weird-ass concurrency issues if this is not 1. + constexpr int kNumBlocksPerCudaBlock = 8; + int dim_block = std::max( + static_cast( + std::ceil(num_blocks / static_cast(kNumBlocksPerCudaBlock))), + 1); + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kNumBlocksPerCudaBlock); + // We have to do the neighbors one at a time basically for concurrency + // issues. + for (int i = 0; i < kNumNeighbors; i++) { + updateLocalNeighborBandsKernel<<>>( + num_blocks, i, block_pointers.data(), neighbor_table_device_.data(), + neighbor_pointers_device_.data(), max_squared_distance_vox, + updated_blocks_device_.data()); + } + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + kernel_timer.Stop(); + + // Unpack the kernel results. + // TODO(helen): swap this to a kernel operation. + updated_blocks->clear(); + updated_blocks_host_ = updated_blocks_device_; + block_pointers_host_.resize(0); + + for (size_t i = 0; i < neighbor_indices.size(); i++) { + if (updated_blocks_host_[i]) { + updated_blocks->push_back(neighbor_indices[i]); + block_pointers_host_.push_back(neighbor_pointers_host_[i]); + } + } + *updated_block_pointers = block_pointers_host_; +} + +void EsdfIntegrator::createNeighborTable( + const std::vector& block_indices, EsdfLayer* esdf_layer, + std::vector* neighbor_indices, + host_vector* neighbor_pointers, + host_vector* neighbor_table) { + // TODO(helen): make this extensible to different number of neighbors. + constexpr int kNumNeighbors = 6; + int num_blocks = block_indices.size(); + + // Hash map mapping the neighbor index to the pointers above. + Index3DHashMapType::type neighbor_map; + + // Direction Shorthand: axis = neighbor_index/2 + // direction = neighbor_index%2 ? -1 : 1 + Index3D direction = Index3D::Zero(); + for (int block_number = 0; block_number < num_blocks; block_number++) { + const Index3D& block_index = block_indices[block_number]; + for (int neighbor_number = 0; neighbor_number < kNumNeighbors; + neighbor_number++) { + direction.setZero(); + // Change just one axis of the direction. + direction(neighbor_number / 2) = neighbor_number % 2 ? -1 : 1; + // Check if this is already in our hash. + Index3D neighbor_index = block_index + direction; + auto res = neighbor_map.find(neighbor_index); + if (res != neighbor_map.end()) { + (*neighbor_table)[block_number * kNumNeighbors + neighbor_number] = + res->second; + } else { + // Doesn't exist in the neighbor list yet. + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(neighbor_index); + if (esdf_block) { + int next_index = neighbor_indices->size(); + neighbor_indices->push_back(neighbor_index); + neighbor_pointers->push_back(esdf_block.get()); + neighbor_map[neighbor_index] = next_index; + (*neighbor_table)[block_number * kNumNeighbors + neighbor_number] = + next_index; + } else { + (*neighbor_table)[block_number * kNumNeighbors + neighbor_number] = + -1; + neighbor_map[neighbor_index] = -1; + } + } + } + } + CHECK_EQ(neighbor_table->size(), kNumNeighbors * block_indices.size()); + CHECK_EQ(neighbor_indices->size(), neighbor_pointers->size()); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/integrators/cuda/frustum.cu b/nvblox/src/integrators/cuda/frustum.cu new file mode 100644 index 00000000..ffc4af0e --- /dev/null +++ b/nvblox/src/integrators/cuda/frustum.cu @@ -0,0 +1,364 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/integrators/frustum.h" + +#include "nvblox/core/hash.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_vector.h" +#include "nvblox/integrators/ray_caster.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +FrustumCalculator::FrustumCalculator() { cudaStreamCreate(&cuda_stream_); } +FrustumCalculator::~FrustumCalculator() { cudaStreamDestroy(cuda_stream_); } + +unsigned int FrustumCalculator::raycast_subsampling_factor() const { + return raycast_subsampling_factor_; +} + +void FrustumCalculator::raycast_subsampling_factor( + unsigned int raycast_subsampling_factor) { + CHECK_GT(raycast_subsampling_factor, 0); + raycast_subsampling_factor_ = raycast_subsampling_factor; +} + +// AABB linear indexing +// - We index in x-major, i.e. x is varied first, then y, then z. +// - Linear indexing within an AABB is relative and starts at zero. This is +// not true for AABB 3D indexing which is w.r.t. the layer origin. +__host__ __device__ inline size_t layerIndexToAabbLinearIndex( + const Index3D& index, const Index3D& aabb_min, const Index3D& aabb_size) { + const Index3D index_shifted = index - aabb_min; + return index_shifted.x() + // NOLINT + index_shifted.y() * aabb_size.x() + // NOLINT + index_shifted.z() * aabb_size.x() * aabb_size.y(); // NOLINT +} + +__host__ __device__ inline Index3D aabbLinearIndexToLayerIndex( + const size_t lin_idx, const Index3D& aabb_min, const Index3D& aabb_size) { + const Index3D index(lin_idx % aabb_size.x(), // NOLINT + (lin_idx / aabb_size.x()) % aabb_size.y(), // NOLINT + lin_idx / (aabb_size.x() * aabb_size.y())); // NOLINT + return index + aabb_min; +} + +__device__ void setIndexUpdated(const Index3D& index_to_update, + const Index3D& aabb_min, + const Index3D& aabb_size, bool* aabb_updated) { + const size_t linear_size = aabb_size.x() * aabb_size.y() * aabb_size.z(); + const size_t lin_idx = + layerIndexToAabbLinearIndex(index_to_update, aabb_min, aabb_size); + if (lin_idx < linear_size) { + aabb_updated[lin_idx] = true; + } +} + +template +void convertAabbUpdatedToVector(const Index3D& aabb_min, + const Index3D& aabb_size, + size_t aabb_linear_size, bool* aabb_updated, + T* indices) { + indices->reserve(aabb_linear_size); + for (size_t i = 0; i < aabb_linear_size; i++) { + if (aabb_updated[i]) { + indices->push_back(aabbLinearIndexToLayerIndex(i, aabb_min, aabb_size)); + } + } +} + +__global__ void getBlockIndicesInImageKernel( + const Transform T_L_C, const Camera camera, const float* image, int rows, + int cols, const float block_size, const float max_integration_distance_m, + const float truncation_distance_m, const Index3D aabb_min, + const Index3D aabb_size, bool* aabb_updated) { + // First, figure out which pixel we're in. + int pixel_row = blockIdx.x * blockDim.x + threadIdx.x; + int pixel_col = blockIdx.y * blockDim.y + threadIdx.y; + + // Hooray we do nothing. + if (pixel_row >= rows || pixel_col >= cols) { + return; + } + + // Look up the pixel we care about. + float depth = image::access(pixel_row, pixel_col, cols, image); + if (depth <= 0.0f) { + return; + } + if (max_integration_distance_m > 0.0f && depth > max_integration_distance_m) { + depth = max_integration_distance_m; + } + + // Ok now project this thing into space. + Vector3f p_C = (depth + truncation_distance_m) * + camera.rayFromPixelIndices(Index2D(pixel_col, pixel_row)); + Vector3f p_L = T_L_C * p_C; + + // Now we have the position of the thing in space. Now we need the block + // index. + Index3D block_index = getBlockIndexFromPositionInLayer(block_size, p_L); + setIndexUpdated(block_index, aabb_min, aabb_size, aabb_updated); +} + +__global__ void raycastToBlocksKernel(int num_blocks, Index3D* block_indices, + const Transform T_L_C, float block_size, + const Index3D aabb_min, + const Index3D aabb_size, + bool* aabb_updated) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int corner_index = threadIdx.y; + + if (index >= num_blocks) { + return; + } + + constexpr float corner_increment_table[9][3] = { + {0.0f, 0.0f, 0.0f}, // NOLINT + {1.0f, 0.0f, 0.0f}, // NOLINT + {0.0f, 1.0f, 0.0f}, // NOLINT + {0.0f, 0.0f, 1.0f}, // NOLINT + {1.0f, 1.0f, 0.0f}, // NOLINT + {1.0f, 0.0f, 1.0f}, // NOLINT + {0.0f, 1.0f, 1.0f}, // NOLINT + {1.0f, 1.0f, 1.0f}, // NOLINT + {0.5f, 0.5f, 0.5f}, // NOLINT + }; + + const Vector3f increment(corner_increment_table[corner_index][0], + corner_increment_table[corner_index][1], + corner_increment_table[corner_index][2]); + + const Index3D& block_index = block_indices[index]; + + RayCaster raycaster(T_L_C.translation() / block_size, + block_index.cast() + increment); + Index3D ray_index; + while (raycaster.nextRayIndex(&ray_index)) { + setIndexUpdated(ray_index, aabb_min, aabb_size, aabb_updated); + } +} + +__global__ void combinedBlockIndicesInImageKernel( + const Transform T_L_C, const Camera camera, const float* image, int rows, + int cols, const float block_size, const float max_integration_distance_m, + const float truncation_distance_m, int raycast_subsampling_factor, + const Index3D aabb_min, const Index3D aabb_size, bool* aabb_updated) { + // First, figure out which pixel we're in. + const int ray_idx_row = blockIdx.x * blockDim.x + threadIdx.x; + const int ray_idx_col = blockIdx.y * blockDim.y + threadIdx.y; + int pixel_row = ray_idx_row * raycast_subsampling_factor; + int pixel_col = ray_idx_col * raycast_subsampling_factor; + + // Hooray we do nothing. + if (pixel_row >= (rows + raycast_subsampling_factor - 1) || + pixel_col >= (cols + raycast_subsampling_factor - 1)) { + return; + } else { + // Move remaining overhanging pixels back to the borders. + if (pixel_row >= rows) { + pixel_row = rows - 1; + } + if (pixel_col >= cols) { + pixel_col = cols - 1; + } + } + + // Look up the pixel we care about. + float depth = image::access(pixel_row, pixel_col, cols, image); + if (depth <= 0.0f) { + return; + } + if (max_integration_distance_m > 0.0f && depth > max_integration_distance_m) { + depth = max_integration_distance_m; + } + + // Ok now project this thing into space. + Vector3f p_C = (depth + truncation_distance_m) * + camera.rayFromPixelIndices(Index2D(pixel_col, pixel_row)); + Vector3f p_L = T_L_C * p_C; + + // Now we have the position of the thing in space. Now we need the block + // index. + Index3D block_index = getBlockIndexFromPositionInLayer(block_size, p_L); + setIndexUpdated(block_index, aabb_min, aabb_size, aabb_updated); + + // Ok raycast to the correct point in the block. + RayCaster raycaster(T_L_C.translation() / block_size, p_L / block_size); + Index3D ray_index = Index3D::Zero(); + while (raycaster.nextRayIndex(&ray_index)) { + setIndexUpdated(ray_index, aabb_min, aabb_size, aabb_updated); + } +} + +std::vector FrustumCalculator::getBlocksInImageViewCuda( + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + const float block_size, const float truncation_distance_m, + const float max_integration_distance_m) { + timing::Timer setup_timer("in_view/setup"); + + // Aight so first we have to get the AABB of this guy. + const Frustum frustum = + camera.getViewFrustum(T_L_C, 0.0f, max_integration_distance_m); + const AxisAlignedBoundingBox aabb_L = frustum.getAABB(); + + // Get the min index and the max index. + const Index3D min_index = + getBlockIndexFromPositionInLayer(block_size, aabb_L.min()); + const Index3D max_index = + getBlockIndexFromPositionInLayer(block_size, aabb_L.max()); + const Index3D aabb_size = max_index - min_index + Index3D::Ones(); + const size_t aabb_linear_size = aabb_size.x() * aabb_size.y() * aabb_size.z(); + + // A 3D grid of bools, one for each block in the AABB, which indicates if it + // is in the view. The 3D grid is represented as a flat vector. + if (aabb_linear_size > aabb_device_buffer_.size()) { + constexpr float kBufferExpansionFactor = 1.5f; + const int new_size = + static_cast(kBufferExpansionFactor * aabb_linear_size); + aabb_device_buffer_.reserve(new_size); + aabb_host_buffer_.reserve(new_size); + } + checkCudaErrors(cudaMemsetAsync(aabb_device_buffer_.data(), 0, + sizeof(bool) * aabb_linear_size)); + aabb_device_buffer_.resize(aabb_linear_size); + aabb_host_buffer_.resize(aabb_linear_size); + + setup_timer.Stop(); + + // Raycast + if (raycast_to_pixels_) { + getBlocksByRaycastingPixels(T_L_C, camera, depth_frame, block_size, + truncation_distance_m, + max_integration_distance_m, min_index, + aabb_size, aabb_device_buffer_.data()); + } else { + getBlocksByRaycastingCorners(T_L_C, camera, depth_frame, block_size, + truncation_distance_m, + max_integration_distance_m, min_index, + aabb_size, aabb_device_buffer_.data()); + } + + // Output vector. + timing::Timer output_timer("in_view/output"); + cudaMemcpyAsync(aabb_host_buffer_.data(), aabb_device_buffer_.data(), + sizeof(bool) * aabb_linear_size, cudaMemcpyDeviceToHost, + cuda_stream_); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + std::vector output_vector; + convertAabbUpdatedToVector>( + min_index, aabb_size, aabb_linear_size, aabb_host_buffer_.data(), + &output_vector); + output_timer.Stop(); + + // We have to manually destruct this. :( + timing::Timer destory_timer("in_view/destroy"); + destory_timer.Stop(); + return output_vector; +} + +void FrustumCalculator::getBlocksByRaycastingCorners( + const Transform& T_L_C, const Camera& camera, const DepthImage& depth_frame, + float block_size, const float truncation_distance_m, + const float max_integration_distance_m, const Index3D& min_index, + const Index3D& aabb_size, bool* aabb_updated_cuda) { + // Get the blocks touched by the ray endpoints + // We'll do warps of 32x32 pixels in the image. This is 1024 threads which is + // in the recommended 512-1024 range. + constexpr int kThreadDim = 16; + int rounded_rows = static_cast( + std::ceil(depth_frame.rows() / static_cast(kThreadDim))); + int rounded_cols = static_cast( + std::ceil(depth_frame.cols() / static_cast(kThreadDim))); + dim3 block_dim(rounded_rows, rounded_cols); + dim3 thread_dim(kThreadDim, kThreadDim); + + timing::Timer image_blocks_timer("in_view/get_image_blocks"); + getBlockIndicesInImageKernel<<>>( + T_L_C, camera, depth_frame.dataConstPtr(), depth_frame.rows(), + depth_frame.cols(), block_size, max_integration_distance_m, + truncation_distance_m, min_index, aabb_size, aabb_updated_cuda); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + image_blocks_timer.Stop(); + + timing::Timer image_blocks_copy_timer("in_view/image_blocks_copy"); + + unified_vector initial_vector; + const size_t aabb_linear_size = aabb_size.x() * aabb_size.y() * aabb_size.z(); + initial_vector.reserve(aabb_linear_size / 3); + convertAabbUpdatedToVector>( + min_index, aabb_size, aabb_linear_size, aabb_updated_cuda, + &initial_vector); + image_blocks_copy_timer.Stop(); + + // Call the kernel to do raycasting. + timing::Timer raycast_blocks_timer("in_view/raycast_blocks"); + + int num_initial_blocks = initial_vector.size(); + constexpr int kNumCorners = 9; + constexpr int kNumBlocksPerThreadBlock = 40; + int raycast_block_dim = static_cast(std::ceil( + static_cast(num_initial_blocks) / kNumBlocksPerThreadBlock)); + dim3 raycast_thread_dim(kNumBlocksPerThreadBlock, kNumCorners); + raycastToBlocksKernel<<>>( + num_initial_blocks, initial_vector.data(), T_L_C, block_size, min_index, + aabb_size, aabb_updated_cuda); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + raycast_blocks_timer.Stop(); +} + +void FrustumCalculator::getBlocksByRaycastingPixels( + const Transform& T_L_C, const Camera& camera, const DepthImage& depth_frame, + float block_size, const float truncation_distance_m, + const float max_integration_distance_m, const Index3D& min_index, + const Index3D& aabb_size, bool* aabb_updated_cuda) { + // Number of rays per dimension. Depth frame size / subsampling rate. + const int num_subsampled_rows = + std::ceil(static_cast(depth_frame.rows() + 1) / + static_cast(raycast_subsampling_factor_)); + const int num_subsampled_cols = + std::ceil(static_cast(depth_frame.cols() + 1) / + static_cast(raycast_subsampling_factor_)); + + // We'll do warps of 32x32 pixels in the image. This is 1024 threads which is + // in the recommended 512-1024 range. + constexpr int kThreadDim = 16; + const int rounded_rows = static_cast( + std::ceil(num_subsampled_rows / static_cast(kThreadDim))); + const int rounded_cols = static_cast( + std::ceil(num_subsampled_cols / static_cast(kThreadDim))); + dim3 block_dim(rounded_rows, rounded_cols); + dim3 thread_dim(kThreadDim, kThreadDim); + + timing::Timer combined_kernel_timer("in_view/combined_kernel"); + combinedBlockIndicesInImageKernel<<>>( + T_L_C, camera, depth_frame.dataConstPtr(), depth_frame.rows(), + depth_frame.cols(), block_size, max_integration_distance_m, + truncation_distance_m, raycast_subsampling_factor_, min_index, aabb_size, + aabb_updated_cuda); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + combined_kernel_timer.Stop(); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/integrators/cuda/projective_color_integrator.cu b/nvblox/src/integrators/cuda/projective_color_integrator.cu new file mode 100644 index 00000000..4ba7d815 --- /dev/null +++ b/nvblox/src/integrators/cuda/projective_color_integrator.cu @@ -0,0 +1,362 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/integrators/projective_color_integrator.h" + +#include "nvblox/integrators/cuda/projective_integrators_common.cuh" +#include "nvblox/integrators/integrators_common.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +ProjectiveColorIntegrator::ProjectiveColorIntegrator() + : ProjectiveIntegratorBase() { + sphere_tracer_.params().maximum_ray_length_m = max_integration_distance_m_; + checkCudaErrors(cudaStreamCreate(&integration_stream_)); +} + +ProjectiveColorIntegrator::~ProjectiveColorIntegrator() { + finish(); + checkCudaErrors(cudaStreamDestroy(integration_stream_)); +} + +void ProjectiveColorIntegrator::finish() const { + cudaStreamSynchronize(integration_stream_); +} + +void ProjectiveColorIntegrator::integrateFrame( + const ColorImage& color_frame, const Transform& T_L_C, const Camera& camera, + const TsdfLayer& tsdf_layer, ColorLayer* color_layer, + std::vector* updated_blocks) { + CHECK_NOTNULL(color_layer); + CHECK_EQ(tsdf_layer.block_size(), color_layer->block_size()); + + // Metric truncation distance for this layer + const float voxel_size = + color_layer->block_size() / VoxelBlock::kVoxelsPerSide; + const float truncation_distance_m = truncation_distance_vox_ * voxel_size; + + timing::Timer blocks_in_view_timer("color/integrate/get_blocks_in_view"); + std::vector block_indices = + getBlocksInView(T_L_C, camera, color_layer->block_size()); + blocks_in_view_timer.Stop(); + + // Check which of these blocks are: + // - Allocated in the TSDF, and + // - have at least a single voxel within the truncation band + // This is because: + // - We don't allocate new geometry here, we just color existing geometry + // - We don't color freespace. + timing::Timer blocks_in_band_timer( + "color/integrate/reduce_to_blocks_in_band"); + block_indices = reduceBlocksToThoseInTruncationBand(block_indices, tsdf_layer, + truncation_distance_m); + blocks_in_band_timer.Stop(); + + // Allocate blocks (CPU) + // We allocate color blocks where + // - there are allocated TSDF blocks, AND + // - these blocks are within the truncation band + timing::Timer allocate_blocks_timer("color/integrate/allocate_blocks"); + allocateBlocksWhereRequired(block_indices, color_layer); + allocate_blocks_timer.Stop(); + + // Create a synthetic depth image + timing::Timer sphere_trace_timer("color/integrate/sphere_trace"); + std::shared_ptr synthetic_depth_image_ptr = + sphere_tracer_.renderImageOnGPU( + camera, T_L_C, tsdf_layer, truncation_distance_m, MemoryType::kDevice, + depth_render_ray_subsampling_factor_); + sphere_trace_timer.Stop(); + + // Update identified blocks + // Calls out to the child-class implementing the integation (GPU) + timing::Timer update_blocks_timer("color/integrate/update_blocks"); + updateBlocks(block_indices, color_frame, *synthetic_depth_image_ptr, T_L_C, + camera, truncation_distance_m, color_layer); + update_blocks_timer.Stop(); + + if (updated_blocks != nullptr) { + *updated_blocks = block_indices; + } +} + +__device__ inline Color blendTwoColors(const Color& first_color, + float first_weight, + const Color& second_color, + float second_weight) { + float total_weight = first_weight + second_weight; + + first_weight /= total_weight; + second_weight /= total_weight; + + Color new_color; + new_color.r = static_cast(std::round( + first_color.r * first_weight + second_color.r * second_weight)); + new_color.g = static_cast(std::round( + first_color.g * first_weight + second_color.g * second_weight)); + new_color.b = static_cast(std::round( + first_color.b * first_weight + second_color.b * second_weight)); + + return new_color; +} + +__device__ inline bool updateVoxel(const Color color_measured, + ColorVoxel* voxel_ptr, + const float voxel_depth_m, + const float truncation_distance_m, + const float max_weight) { + // NOTE(alexmillane): We integrate all voxels passed to this function, We + // should probably not do this. We should no update some based on occlusion + // and their distance in the distance field.... + // TODO(alexmillane): The above. + + // Read CURRENT voxel values (from global GPU memory) + const Color voxel_color_current = voxel_ptr->color; + const float voxel_weight_current = voxel_ptr->weight; + // Fuse + constexpr float measurement_weight = 1.0f; + const Color fused_color = + blendTwoColors(voxel_color_current, voxel_weight_current, color_measured, + measurement_weight); + const float weight = + fmin(measurement_weight + voxel_weight_current, max_weight); + // Write NEW voxel values (to global GPU memory) + voxel_ptr->color = fused_color; + voxel_ptr->weight = weight; + return true; +} + +__global__ void integrateBlocks( + const Index3D* block_indices_device_ptr, const Camera camera, + const Color* color_image, const int color_rows, const int color_cols, + const float* depth_image, const int depth_rows, const int depth_cols, + const Transform T_C_L, const float block_size, + const float truncation_distance_m, const float max_weight, + const float max_integration_distance, const int depth_subsample_factor, + ColorBlock** block_device_ptrs) { + // Get - the image-space projection of the voxel associated with this thread + // - the depth associated with the projection. + Eigen::Vector2f u_px; + float voxel_depth_m; + if (!projectThreadVoxel(block_indices_device_ptr, camera, T_C_L, block_size, + &u_px, &voxel_depth_m)) { + return; + } + + // If voxel further away than the limit, skip this voxel + if (max_integration_distance > 0.0f) { + if (voxel_depth_m > max_integration_distance) { + return; + } + } + + const Eigen::Vector2f u_px_depth = + u_px / static_cast(depth_subsample_factor); + float surface_depth_m; + if (!interpolation::interpolate2DLinear( + depth_image, u_px_depth, depth_rows, depth_cols, &surface_depth_m)) { + return; + } + + // Occlusion testing + // Get the distance of the voxel from the rendered surface. If outside + // truncation band, skip. + const float voxel_distance_from_surface = surface_depth_m - voxel_depth_m; + if (fabsf(voxel_distance_from_surface) > truncation_distance_m) { + return; + } + + Color image_value; + if (!interpolation::interpolate2DLinear(color_image, u_px, color_rows, + color_cols, &image_value)) { + return; + } + + // Get the Voxel we'll update in this thread + // NOTE(alexmillane): Note that we've reverse the voxel indexing order such + // that adjacent threads (x-major) access adjacent memory locations in the + // block (z-major). + ColorVoxel* voxel_ptr = + &(block_device_ptrs[blockIdx.x] + ->voxels[threadIdx.z][threadIdx.y][threadIdx.x]); + + // Update the voxel using the update rule for this layer type + updateVoxel(image_value, voxel_ptr, voxel_depth_m, truncation_distance_m, + max_weight); +} + +void ProjectiveColorIntegrator::updateBlocks( + const std::vector& block_indices, const ColorImage& color_frame, + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, ColorLayer* layer_ptr) { + CHECK_NOTNULL(layer_ptr); + CHECK_EQ(color_frame.rows() % depth_frame.rows(), 0); + CHECK_EQ(color_frame.cols() % depth_frame.cols(), 0); + + if (block_indices.empty()) { + return; + } + const int num_blocks = block_indices.size(); + const int depth_subsampling_factor = color_frame.rows() / depth_frame.rows(); + CHECK_EQ(color_frame.cols() / depth_frame.cols(), depth_subsampling_factor); + + // Expand the buffers when needed + if (num_blocks > block_indices_device_.size()) { + constexpr float kBufferExpansionFactor = 1.5f; + const int new_size = static_cast(kBufferExpansionFactor * num_blocks); + block_indices_device_.reserve(new_size); + block_ptrs_device_.reserve(new_size); + block_indices_host_.reserve(new_size); + block_ptrs_host_.reserve(new_size); + } + + // Stage on the host pinned memory + block_indices_host_ = block_indices; + block_ptrs_host_ = getBlockPtrsFromIndices(block_indices, layer_ptr); + + // Transfer to the device + block_indices_device_ = block_indices_host_; + block_ptrs_device_ = block_ptrs_host_; + + // We need the inverse transform in the kernel + const Transform T_C_L = T_L_C.inverse(); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_thread_blocks = block_indices.size(); + // clang-format off + integrateBlocks<<>>( + block_indices_device_.data(), + camera, + color_frame.dataConstPtr(), + color_frame.rows(), + color_frame.cols(), + depth_frame.dataConstPtr(), + depth_frame.rows(), + depth_frame.cols(), + T_C_L, + layer_ptr->block_size(), + truncation_distance_m, + max_weight_, + max_integration_distance_m_, + depth_subsampling_factor, + block_ptrs_device_.data()); + // clang-format on + checkCudaErrors(cudaPeekAtLastError()); + + // Finish processing of the frame before returning control + finish(); +} + +__global__ void checkBlocksInTruncationBand( + const VoxelBlock** block_device_ptrs, + const float truncation_distance_m, + bool* contains_truncation_band_device_ptr) { + // A single thread in each block initializes the output to 0 + if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) { + contains_truncation_band_device_ptr[blockIdx.x] = 0; + } + __syncthreads(); + + // Get the Voxel we'll check in this thread + const TsdfVoxel voxel = block_device_ptrs[blockIdx.x] + ->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + + // If this voxel in the truncation band, write the flag to say that the block + // should be processed. + // NOTE(alexmillane): There will be collision on write here. However, from my + // reading, all threads' writes will result in a single write to global + // memory. Because we only write a single value (1) it doesn't matter which + // thread "wins". + if (std::abs(voxel.distance) <= truncation_distance_m) { + contains_truncation_band_device_ptr[blockIdx.x] = true; + } +} + +std::vector +ProjectiveColorIntegrator::reduceBlocksToThoseInTruncationBand( + const std::vector& block_indices, const TsdfLayer& tsdf_layer, + const float truncation_distance_m) { + // Check 1) Are the blocks allocated + // - performed on the CPU because the hash-map is on the CPU + std::vector block_indices_check_1; + block_indices_check_1.reserve(block_indices.size()); + for (const Index3D& block_idx : block_indices) { + if (tsdf_layer.isBlockAllocated(block_idx)) { + block_indices_check_1.push_back(block_idx); + } + } + + if (block_indices_check_1.empty()) { + return block_indices_check_1; + } + + // Check 2) Does each of the blocks have a voxel within the truncation band + // - performed on the GPU because the blocks are there + // Get the blocks we need to check + std::vector block_ptrs = + getBlockPtrsFromIndices(block_indices_check_1, tsdf_layer); + + const int num_blocks = block_ptrs.size(); + + // Expand the buffers when needed + if (num_blocks > truncation_band_block_ptrs_device_.size()) { + constexpr float kBufferExpansionFactor = 1.5f; + const int new_size = static_cast(kBufferExpansionFactor * num_blocks); + truncation_band_block_ptrs_host_.reserve(new_size); + truncation_band_block_ptrs_device_.reserve(new_size); + block_in_truncation_band_device_.reserve(new_size); + block_in_truncation_band_host_.reserve(new_size); + } + + // Host -> Device + truncation_band_block_ptrs_host_ = block_ptrs; + truncation_band_block_ptrs_device_ = truncation_band_block_ptrs_host_; + + // Prepare output space + block_in_truncation_band_device_.resize(num_blocks); + + // Do the check on GPU + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_thread_blocks = num_blocks; + // clang-format off + checkBlocksInTruncationBand<<>>( + truncation_band_block_ptrs_device_.data(), + truncation_distance_m, + block_in_truncation_band_device_.data()); + // clang-format on + checkCudaErrors(cudaStreamSynchronize(integration_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + // Copy results back + block_in_truncation_band_host_ = block_in_truncation_band_device_; + + // Filter the indices using the result + std::vector block_indices_check_2; + block_indices_check_2.reserve(block_indices_check_1.size()); + for (int i = 0; i < block_indices_check_1.size(); i++) { + if (block_in_truncation_band_host_[i] == true) { + block_indices_check_2.push_back(block_indices_check_1[i]); + } + } + + return block_indices_check_2; +} + +} // namespace nvblox diff --git a/nvblox/src/integrators/cuda/projective_tsdf_integrator.cu b/nvblox/src/integrators/cuda/projective_tsdf_integrator.cu new file mode 100644 index 00000000..0c28e1db --- /dev/null +++ b/nvblox/src/integrators/cuda/projective_tsdf_integrator.cu @@ -0,0 +1,215 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/color.h" +#include "nvblox/core/cuda/error_check.cuh" +#include "nvblox/core/interpolation_2d.h" +#include "nvblox/integrators/cuda/projective_integrators_common.cuh" +#include "nvblox/integrators/integrators_common.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +ProjectiveTsdfIntegrator::ProjectiveTsdfIntegrator() + : ProjectiveIntegratorBase() { + checkCudaErrors(cudaStreamCreate(&integration_stream_)); +} + +ProjectiveTsdfIntegrator::~ProjectiveTsdfIntegrator() { + finish(); + checkCudaErrors(cudaStreamDestroy(integration_stream_)); +} + +void ProjectiveTsdfIntegrator::finish() const { + cudaStreamSynchronize(integration_stream_); +} + +void ProjectiveTsdfIntegrator::integrateFrame( + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + TsdfLayer* layer, std::vector* updated_blocks) { + CHECK_NOTNULL(layer); + timing::Timer tsdf_timer("tsdf/integrate"); + // Metric truncation distance for this layer + const float voxel_size = + layer->block_size() / VoxelBlock::kVoxelsPerSide; + const float truncation_distance_m = truncation_distance_vox_ * voxel_size; + + // Identify blocks we can (potentially) see (CPU) + timing::Timer blocks_in_view_timer("tsdf/integrate/get_blocks_in_view"); + const std::vector block_indices = getBlocksInViewUsingRaycasting( + depth_frame, T_L_C, camera, layer->block_size()); + blocks_in_view_timer.Stop(); + + // Allocate blocks (CPU) + timing::Timer allocate_blocks_timer("tsdf/integrate/allocate_blocks"); + allocateBlocksWhereRequired(block_indices, layer); + allocate_blocks_timer.Stop(); + + // Update identified blocks + // Calls out to the child-class implementing the integation (CPU or GPU) + timing::Timer update_blocks_timer("tsdf/integrate/update_blocks"); + updateBlocks(block_indices, depth_frame, T_L_C, camera, truncation_distance_m, + layer); + update_blocks_timer.Stop(); + + if (updated_blocks != nullptr) { + *updated_blocks = block_indices; + } +} + +__device__ inline bool updateVoxel(const float surface_depth_measured, + TsdfVoxel* voxel_ptr, + const float voxel_depth_m, + const float truncation_distance_m, + const float max_weight) { + // Get the MEASURED depth of the VOXEL + const float voxel_distance_measured = surface_depth_measured - voxel_depth_m; + + // If we're behind the negative truncation distance, just continue. + if (voxel_distance_measured < -truncation_distance_m) { + return false; + } + + // Read CURRENT voxel values (from global GPU memory) + const float voxel_distance_current = voxel_ptr->distance; + const float voxel_weight_current = voxel_ptr->weight; + + // NOTE(alexmillane): We could try to use CUDA math functions to speed up + // below + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html#group__CUDA__MATH__SINGLE + + // Fuse + constexpr float measurement_weight = 1.0f; + float fused_distance = (voxel_distance_measured * measurement_weight + + voxel_distance_current * voxel_weight_current) / + (measurement_weight + voxel_weight_current); + + // Clip + if (fused_distance > 0.0f) { + fused_distance = fmin(truncation_distance_m, fused_distance); + } else { + fused_distance = fmax(-truncation_distance_m, fused_distance); + } + const float weight = + fmin(measurement_weight + voxel_weight_current, max_weight); + + // Write NEW voxel values (to global GPU memory) + voxel_ptr->distance = fused_distance; + voxel_ptr->weight = weight; + return true; +} + +__global__ void integrateBlocks(const Index3D* block_indices_device_ptr, + const Camera camera, const float* image, + int rows, int cols, const Transform T_C_L, + const float block_size, + const float truncation_distance_m, + const float max_weight, + const float max_integration_distance, + TsdfBlock** block_device_ptrs) { + // Get - the image-space projection of the voxel associated with this thread + // - the depth associated with the projection. + Eigen::Vector2f u_px; + float voxel_depth_m; + if (!projectThreadVoxel(block_indices_device_ptr, camera, T_C_L, block_size, + &u_px, &voxel_depth_m)) { + return; + } + + // If voxel further away than the limit, skip this voxel + if (max_integration_distance > 0.0f) { + if (voxel_depth_m > max_integration_distance) { + return; + } + } + + float image_value; + if (!interpolation::interpolate2DLinear(image, u_px, rows, cols, + &image_value)) { + return; + } + + // Get the Voxel we'll update in this thread + // NOTE(alexmillane): Note that we've reverse the voxel indexing order such + // that adjacent threads (x-major) access adjacent memory locations in the + // block (z-major). + TsdfVoxel* voxel_ptr = &(block_device_ptrs[blockIdx.x] + ->voxels[threadIdx.z][threadIdx.y][threadIdx.x]); + + // Update the voxel using the update rule for this layer type + updateVoxel(image_value, voxel_ptr, voxel_depth_m, truncation_distance_m, + max_weight); +} + +void ProjectiveTsdfIntegrator::updateBlocks( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, TsdfLayer* layer_ptr) { + CHECK_NOTNULL(layer_ptr); + + if (block_indices.empty()) { + return; + } + const int num_blocks = block_indices.size(); + + // Expand the buffers when needed + if (num_blocks > block_indices_device_.size()) { + constexpr float kBufferExpansionFactor = 1.5f; + const int new_size = + static_cast(kBufferExpansionFactor * num_blocks); + block_indices_device_.reserve(new_size); + block_ptrs_device_.reserve(new_size); + block_indices_host_.reserve(new_size); + block_ptrs_host_.reserve(new_size); + } + + // Stage on the host pinned memory + block_indices_host_ = block_indices; + block_ptrs_host_ = getBlockPtrsFromIndices(block_indices, layer_ptr); + + // Transfer to the device + block_indices_device_ = block_indices_host_; + block_ptrs_device_ = block_ptrs_host_; + + // We need the inverse transform in the kernel + const Transform T_C_L = T_L_C.inverse(); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_thread_blocks = num_blocks; + // clang-format off + integrateBlocks<<>>( + block_indices_device_.data(), + camera, + depth_frame.dataConstPtr(), + depth_frame.rows(), + depth_frame.cols(), + T_C_L, + layer_ptr->block_size(), + truncation_distance_m, + max_weight_, + max_integration_distance_m_, + block_ptrs_device_.data()); + // clang-format on + checkCudaErrors(cudaPeekAtLastError()); + + // Finish processing of the frame before returning control + finish(); +} + +} // namespace nvblox diff --git a/nvblox/src/integrators/esdf_integrator.cpp b/nvblox/src/integrators/esdf_integrator.cpp new file mode 100644 index 00000000..7311e8a8 --- /dev/null +++ b/nvblox/src/integrators/esdf_integrator.cpp @@ -0,0 +1,830 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/hash.h" +#include "nvblox/utils/timing.h" + +#include "nvblox/integrators/esdf_integrator.h" + +namespace nvblox { + +// Integrate the entire layer. +void EsdfIntegrator::integrateLayer(const TsdfLayer& tsdf_layer, + EsdfLayer* esdf_layer) { + std::vector block_indices = tsdf_layer.getAllBlockIndices(); + constexpr DeviceType device_type = DeviceType::kGPU; + if (device_type == DeviceType::kGPU) { + integrateBlocksOnGPU(tsdf_layer, block_indices, esdf_layer); + } else { + integrateBlocksOnCPU(tsdf_layer, block_indices, esdf_layer); + } +} + +// Integrate specific blocks in a layer. +void EsdfIntegrator::integrateBlocks(const TsdfLayer& tsdf_layer, + const std::vector& block_indices, + EsdfLayer* esdf_layer) { + integrateBlocksOnGPU(tsdf_layer, block_indices, esdf_layer); +} + +void EsdfIntegrator::integrateBlocksOnCPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + EsdfLayer* esdf_layer) { + timing::Timer esdf_timer("esdf/integrate"); + + timing::Timer allocate_timer("esdf/integrate/allocate"); + // First, allocate all the destination blocks. + allocateBlocksOnCPU(block_indices, esdf_layer); + allocate_timer.Stop(); + + timing::Timer mark_timer("esdf/integrate/mark_sites"); + // Then, mark all the sites on CPU. + // This finds all the blocks that are eligible to be parents. + std::vector blocks_with_sites; + std::vector blocks_to_clear; + markAllSitesOnCPU(tsdf_layer, block_indices, esdf_layer, &blocks_with_sites, + &blocks_to_clear); + mark_timer.Stop(); + + timing::Timer clear_timer("esdf/integrate/clear_invalid"); + std::vector cleared_blocks; + + clearInvalidOnCPU(blocks_to_clear, esdf_layer, &cleared_blocks); + + VLOG(3) << "Invalid blocks cleared: " << cleared_blocks.size(); + clear_timer.Stop(); + + timing::Timer compute_timer("esdf/integrate/compute"); + // Parallel block banding on CPU. + // First we call all the blocks with sites to propagate the distances out + // from the sites. + computeEsdfOnCPU(blocks_with_sites, esdf_layer); + // In case some blocks were cleared (otherwise this is a no-op), we also need + // to make sure any cleared blocks get their values updated. This simply + // ensures that their values are up-to-date. + // TODO(helen): check if we can just roll cleared blocks into the list with + // blocks with sites and only call it once. Compare how the speed varies. + computeEsdfOnCPU(cleared_blocks, esdf_layer); + compute_timer.Stop(); +} + +void EsdfIntegrator::allocateBlocksOnCPU( + const std::vector& block_indices, EsdfLayer* esdf_layer) { + // We want to allocate all ESDF layer blocks and copy over the sites. + for (const Index3D& block_index : block_indices) { + esdf_layer->allocateBlockAtIndex(block_index); + } +} + +void EsdfIntegrator::markAllSitesOnCPU( + const TsdfLayer& tsdf_layer, const std::vector& block_indices, + EsdfLayer* esdf_layer, std::vector* blocks_with_sites, + std::vector* blocks_to_clear) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(blocks_with_sites); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = tsdf_layer.block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + // Cache the minimum distance in metric size. + const float min_site_distance_m = min_site_distance_vox_ * voxel_size; + int num_observed = 0; + for (const Index3D& block_index : block_indices) { + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + TsdfBlock::ConstPtr tsdf_block = tsdf_layer.getBlockAtIndex(block_index); + + if (!esdf_block || !tsdf_block) { + LOG(ERROR) << "Somehow trying to update non-existent blocks!"; + continue; + } + + bool block_has_sites = false; + bool has_observed = false; + bool block_to_clear = false; + // Iterate over all the voxels: + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the voxel and call the callback on it: + const TsdfVoxel* tsdf_voxel = + &tsdf_block + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + EsdfVoxel* esdf_voxel = + &esdf_block + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + if (tsdf_voxel->weight >= min_weight_) { + esdf_voxel->observed = true; + // Mark as inside if the voxel distance is negative. + has_observed = true; + bool now_inside = tsdf_voxel->distance <= 0; + if (esdf_voxel->is_inside && now_inside == false) { + block_to_clear = true; + esdf_voxel->squared_distance_vox = max_squared_distance_vox; + esdf_voxel->parent_direction.setZero(); + } + esdf_voxel->is_inside = now_inside; + if (now_inside && + std::abs(tsdf_voxel->distance) <= min_site_distance_m) { + esdf_voxel->is_site = true; + esdf_voxel->squared_distance_vox = 0.0f; + esdf_voxel->parent_direction.setZero(); + block_has_sites = true; + } else { + if (esdf_voxel->is_site) { + esdf_voxel->is_site = false; + block_to_clear = true; + // This block needs to be cleared since something became an + // un-site. + esdf_voxel->squared_distance_vox = max_squared_distance_vox; + esdf_voxel->parent_direction.setZero(); + // Other case is a brand new voxel. + } else if (esdf_voxel->squared_distance_vox <= 0.0f) { + esdf_voxel->squared_distance_vox = max_squared_distance_vox; + esdf_voxel->parent_direction.setZero(); + } + } + } + } + } + } + if (block_has_sites) { + blocks_with_sites->push_back(block_index); + } + if (block_to_clear) { + blocks_to_clear->push_back(block_index); + } + if (has_observed) { + num_observed++; + } + } +} + +void EsdfIntegrator::clearInvalidOnCPU( + const std::vector& blocks_to_clear, EsdfLayer* esdf_layer, + std::vector* updated_blocks) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(updated_blocks); + + updated_blocks->clear(); + Index3DSet all_cleared_blocks; + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + Index3DSet neighbor_clearing_set; + + int cleared_voxels = 0; + + // Ok so the goal is to basically check the neighbors along the edges, + // and then continue sweeping if necessary. + + // Assumptions: all blocks with sites are correct now (they got updated + // earlier). If a block contains an invalid site, it is also present along the + // edge. + for (const Index3D& block_index : blocks_to_clear) { + VLOG(3) << "Clearing WITHIN block: " << block_index.transpose(); + + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + if (!esdf_block) { + continue; + } + bool any_cleared = false; + // First we just reset all the voxels within the block that point to + // non-sites. + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + EsdfVoxel* esdf_voxel = + &esdf_block + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + if (!esdf_voxel->observed || esdf_voxel->is_site || + esdf_voxel->squared_distance_vox >= max_squared_distance_vox) { + continue; + } + + // Get the parent. + Index3D parent_index = voxel_index + esdf_voxel->parent_direction; + + // Check if the voxel is within the same block. + if (parent_index.x() < 0 || parent_index.x() >= kVoxelsPerSide || + parent_index.y() < 0 || parent_index.y() >= kVoxelsPerSide || + parent_index.z() < 0 || parent_index.z() >= kVoxelsPerSide) { + continue; + } + + // Ok check if the parent index is a site. + if (!esdf_block + ->voxels[parent_index.x()][parent_index.y()] + [parent_index.z()] + .is_site) { + clearVoxel(esdf_voxel, max_squared_distance_vox); + cleared_voxels++; + any_cleared = true; + } + } + } + } + + // Iterate over all the neighbors. + // If any of the neighbors reach the edge in clearing, we need to continue + // updating neighbors. + if (any_cleared) { + neighbor_clearing_set.insert(block_index); + all_cleared_blocks.insert(block_index); + } + } + // Ok after we're done with everything, also update any neighbors that need + // clearing. + Index3DSet new_neighbor_clearing_set; + + while (!neighbor_clearing_set.empty()) { + for (const Index3D& block_index : neighbor_clearing_set) { + VLOG(3) << "Clearing block: " << block_index.transpose(); + clearNeighborsOnCPU(block_index, esdf_layer, &new_neighbor_clearing_set); + all_cleared_blocks.insert(block_index); + } + VLOG(3) << "Num neighbors to clear: " << new_neighbor_clearing_set.size(); + std::swap(new_neighbor_clearing_set, neighbor_clearing_set); + new_neighbor_clearing_set.clear(); + } + + VLOG(3) << "Updated voxels by clearing: " << cleared_voxels; + + std::copy(all_cleared_blocks.begin(), all_cleared_blocks.end(), + std::back_inserter(*updated_blocks)); +} + +// This function was written for testing -- rather than incrementally searching +// for cleared blocks from an initial seed lsit, this iterates over the entire +// map and checks the parent of every single voxel. This cannot be parallelized +// without having the whole hash table on GPU, which is why we went with the +// other method for parallelization. +void EsdfIntegrator::clearAllInvalidOnCPU( + EsdfLayer* esdf_layer, std::vector* updated_blocks) { + // Go through all blocks in the map, clearing any that point to an invalid + // parent. + std::vector block_indices = esdf_layer->getAllBlockIndices(); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + Index3DSet neighbor_clearing_set; + + int cleared_voxels = 0; + + // Ok so the goal is to basically check the neighbors along the edges, + // and then continue sweeping if necessary. + + // Assumptions: all blocks with sites are correct now (they got updated + // earlier). If a block contains an invalid site, it is also present along the + // edge. + for (const Index3D& block_index : block_indices) { + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + if (!esdf_block) { + continue; + } + bool any_cleared = false; + + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + EsdfVoxel* esdf_voxel = + &esdf_block + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + if (!esdf_voxel->observed || esdf_voxel->is_site) { + continue; + } + + // Get the parent. + Index3D parent_index = voxel_index + esdf_voxel->parent_direction; + + if (esdf_voxel->parent_direction == Index3D::Zero()) { + continue; + } + + // Check if the voxel is within the same block. + if (parent_index.x() < 0 || parent_index.x() >= kVoxelsPerSide || + parent_index.y() < 0 || parent_index.y() >= kVoxelsPerSide || + parent_index.z() < 0 || parent_index.z() >= kVoxelsPerSide) { + // Then we need to get the block index. + Index3D neighbor_block_index = block_index; + Index3D neighbor_voxel_index = parent_index; + + // Find the parent index. + while (parent_index.x() >= kVoxelsPerSide) { + parent_index.x() -= kVoxelsPerSide; + neighbor_block_index.x()++; + } + while (parent_index.y() >= kVoxelsPerSide) { + parent_index.y() -= kVoxelsPerSide; + neighbor_block_index.y()++; + } + while (parent_index.z() >= kVoxelsPerSide) { + parent_index.z() -= kVoxelsPerSide; + neighbor_block_index.z()++; + } + + while (parent_index.x() < 0) { + parent_index.x() += kVoxelsPerSide; + neighbor_block_index.x()--; + } + while (parent_index.y() < 0) { + parent_index.y() += kVoxelsPerSide; + neighbor_block_index.y()--; + } + while (parent_index.z() < 0) { + parent_index.z() += kVoxelsPerSide; + neighbor_block_index.z()--; + } + + EsdfBlock::ConstPtr neighbor_block = + esdf_layer->getBlockAtIndex(neighbor_block_index); + const EsdfVoxel* neighbor_voxel = + &neighbor_block->voxels[parent_index.x()][parent_index.y()] + [parent_index.z()]; + if (!neighbor_voxel->is_site) { + clearVoxel(esdf_voxel, max_squared_distance_vox); + cleared_voxels++; + any_cleared = true; + } + } else { + // Ok check if the parent index is a site. + if (!esdf_block + ->voxels[parent_index.x()][parent_index.y()] + [parent_index.z()] + .is_site) { + clearVoxel(esdf_voxel, max_squared_distance_vox); + + cleared_voxels++; + any_cleared = true; + } + } + } + } + } + if (any_cleared) { + updated_blocks->push_back(block_index); + } + } + VLOG(3) << "Cleared voxels in batch: " << cleared_voxels; +} + +void EsdfIntegrator::clearNeighborsOnCPU(const Index3D& block_index, + EsdfLayer* esdf_layer, + Index3DSet* neighbor_clearing_set) { + CHECK_NOTNULL(esdf_layer); + CHECK_NOTNULL(neighbor_clearing_set); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + constexpr int kNumSweepDir = 3; + + int cleared_voxels = 0; + + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + if (!esdf_block) { + LOG(ERROR) << "For some reason trying to clear an unallocated block."; + return; + } + + Index3D neighbor_index = block_index; + for (int axis_to_sweep = 0; axis_to_sweep < kNumSweepDir; axis_to_sweep++) { + for (int direction = -1; direction <= 1; direction += 2) { + // Get this neighbor. + bool block_updated = false; + neighbor_index = block_index; + neighbor_index(axis_to_sweep) += direction; + + EsdfBlock::Ptr neighbor_block = + esdf_layer->getBlockAtIndex(neighbor_index); + if (!neighbor_block) { + continue; + } + + VLOG(3) << "Block index: " << block_index.transpose() + << " Neighbor index: " << neighbor_index.transpose() + << " axis: " << axis_to_sweep << " dir: " << direction; + + // Iterate over the axes per neighbor. + // First we look negative. + int voxel_index = 0; + int opposite_voxel_index = kVoxelsPerSide; + // Then we look positive. + if (direction > 0) { + voxel_index = kVoxelsPerSide - 1; + opposite_voxel_index = -1; + } + + Index3D index = Index3D::Zero(); + index(axis_to_sweep) = voxel_index; + int first_axis = 0, second_axis = 1; + getSweepAxes(axis_to_sweep, &first_axis, &second_axis); + for (index(first_axis) = 0; index(first_axis) < kVoxelsPerSide; + index(first_axis)++) { + for (index(second_axis) = 0; index(second_axis) < kVoxelsPerSide; + index(second_axis)++) { + // This is the closest ESDF voxel in our block to this guy. + const EsdfVoxel* esdf_voxel = + &esdf_block->voxels[index.x()][index.y()][index.z()]; + + // If this isn't a cleared voxel, don't bother. + if (/*!esdf_voxel->observed || */ + esdf_voxel->parent_direction != Index3D::Zero()) { + continue; + } + + for (int increment = 1; increment <= kVoxelsPerSide; increment++) { + // Basically we just want to check if the edges' neighbors point + // to something valid. + Index3D neighbor_voxel_index = index; + // Select the opposite voxel index. + neighbor_voxel_index(axis_to_sweep) = + opposite_voxel_index + increment * direction; + EsdfVoxel* neighbor_voxel = + &neighbor_block->voxels[neighbor_voxel_index.x()] + [neighbor_voxel_index.y()] + [neighbor_voxel_index.z()]; + + // If either this was never set or is a site itself, we don't + // care. + if (!neighbor_voxel->observed || neighbor_voxel->is_site || + neighbor_voxel->squared_distance_vox >= + max_squared_distance_vox) { + VLOG(3) << "Skipping because the voxel isn't updateable."; + continue; + } + VLOG(3) << "Block index: " << block_index.transpose() + << " voxel index: " << index.transpose() + << " Neighbor voxel index: " + << neighbor_voxel_index.transpose() + << " Distance: " << neighbor_voxel->squared_distance_vox + << " Parent direction: " + << neighbor_voxel->parent_direction.transpose() + << " Sweep axis: " << axis_to_sweep + << " direction: " << direction + << " increment: " << increment; + + // If the direction of this neighbor isn't pointing in our + // direction, we also don't care. + Index3D parent_voxel_dir = neighbor_voxel->parent_direction; + if ((direction > 0 && parent_voxel_dir(axis_to_sweep) >= 0) || + (direction < 0 && parent_voxel_dir(axis_to_sweep) <= 0)) { + VLOG(3) << "Skipping because direction doesn't match. Parent " + "voxel dir: " + << parent_voxel_dir(axis_to_sweep) + << " Axis: " << axis_to_sweep + << " direction: " << direction; + break; + } + + // If it is, and the matching ESDF voxel parent doesn't match, + // then it needs to be cleared. + // Actually we can just clear it if the neighbor is cleared. + clearVoxel(neighbor_voxel, max_squared_distance_vox); + block_updated = true; + cleared_voxels++; + VLOG(3) << "Clearing voxel."; + + // We need to continue sweeping in this direction to clear the + // rest of the band. Luckily this is already in the structure of + // the for loop. + } + } + } + if (block_updated) { + neighbor_clearing_set->insert(neighbor_index); + } + } + } + VLOG(3) << "Updated neighbor voxels by clearing: " << cleared_voxels + << " updated blocks: " << neighbor_clearing_set->size(); +} + +void EsdfIntegrator::computeEsdfOnCPU( + const std::vector& blocks_with_sites, EsdfLayer* esdf_layer) { + CHECK_NOTNULL(esdf_layer); + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + + // First we go over all of the blocks with sites. + // We compute all the proximal sites inside the block first. + for (const Index3D& block_index : blocks_with_sites) { + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + for (int i = 0; i < 3; i++) { + sweepBlockBandOnCPU(i, voxel_size, esdf_block.get()); + } + } + + std::vector updated_blocks; + std::vector blocks_to_run = blocks_with_sites; + int i = 0; + do { + updated_blocks.clear(); + // We then propagate the edges of these updated blocks to their immediate + // neighbors. + for (const Index3D& block_index : blocks_to_run) { + updateLocalNeighborBandsOnCPU(block_index, esdf_layer, &updated_blocks); + } + + // Then we update the inside values of these neighbors. + for (const Index3D& block_index : updated_blocks) { + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + for (int i = 0; i < 3; i++) { + sweepBlockBandOnCPU(i, voxel_size, esdf_block.get()); + } + } + Index3DSet updated_set(updated_blocks.begin(), updated_blocks.end()); + blocks_to_run.assign(updated_set.begin(), updated_set.end()); + VLOG(3) << "Update " << i << " updated blocks: " << blocks_to_run.size(); + i++; + // We continue updating in a brushfire pattern until no more blocks are + // getting updated. + } while (!updated_blocks.empty()); +} + +void EsdfIntegrator::updateLocalNeighborBandsOnCPU( + const Index3D& block_index, EsdfLayer* esdf_layer, + std::vector* updated_blocks) { + constexpr int kNumSweepDir = 3; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + constexpr float kAlmostZero = 1e-4; + const float voxel_size = esdf_layer->block_size() / kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + // Get the center block. + EsdfBlock::Ptr esdf_block = esdf_layer->getBlockAtIndex(block_index); + if (!esdf_block) { + return; + } + + // Iterate over all the neighbors. + Index3D neighbor_index = block_index; + for (int axis_to_sweep = 0; axis_to_sweep < kNumSweepDir; axis_to_sweep++) { + for (int direction = -1; direction <= 1; direction += 2) { + // Get this neighbor. + bool block_updated = false; + neighbor_index = block_index; + neighbor_index(axis_to_sweep) += direction; + + EsdfBlock::Ptr neighbor_block = + esdf_layer->getBlockAtIndex(neighbor_index); + if (!neighbor_block) { + continue; + } + + // Iterate over the axes per neighbor. + // First we look negative. + int voxel_index = 0; + int opposite_voxel_index = kVoxelsPerSide - 1; + // Then we look positive. + if (direction > 0) { + voxel_index = opposite_voxel_index; + opposite_voxel_index = 0; + } + + Index3D index = Index3D::Zero(); + index(axis_to_sweep) = voxel_index; + int first_axis = 0, second_axis = 1; + getSweepAxes(axis_to_sweep, &first_axis, &second_axis); + for (index(first_axis) = 0; index(first_axis) < kVoxelsPerSide; + index(first_axis)++) { + for (index(second_axis) = 0; index(second_axis) < kVoxelsPerSide; + index(second_axis)++) { + // This is the bottom interface. So pixel 0 of our block should be + // pixel kVoxelsPerSide-1 of the search block. + const EsdfVoxel* esdf_voxel = + &esdf_block->voxels[index.x()][index.y()][index.z()]; + Index3D neighbor_voxel_index = index; + // Select the opposite voxel index. + neighbor_voxel_index(axis_to_sweep) = opposite_voxel_index; + EsdfVoxel* neighbor_voxel = + &neighbor_block + ->voxels[neighbor_voxel_index.x()][neighbor_voxel_index.y()] + [neighbor_voxel_index.z()]; + + if (!esdf_voxel->observed || !neighbor_voxel->observed || + neighbor_voxel->is_site || + esdf_voxel->squared_distance_vox >= max_squared_distance_vox) { + continue; + } + // Determine if we can update this. + Eigen::Vector3i potential_direction = esdf_voxel->parent_direction; + potential_direction(axis_to_sweep) += -direction; + float potential_distance = potential_direction.squaredNorm(); + if (neighbor_voxel->squared_distance_vox > potential_distance) { + neighbor_voxel->parent_direction = potential_direction; + neighbor_voxel->squared_distance_vox = potential_distance; + block_updated = true; + } + } + } + if (block_updated) { + updated_blocks->push_back(neighbor_index); + } + } + } +} + +void EsdfIntegrator::propagateEdgesOnCPU(int axis_to_sweep, float voxel_size, + EsdfBlock* esdf_block) { + CHECK_NOTNULL(esdf_block); + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + constexpr float kAlmostZero = 1e-4; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + // Select the axes to increment. + int first_axis = 0, second_axis = 1; + getSweepAxes(axis_to_sweep, &first_axis, &second_axis); + + // Just need to sweep once through. + Index3D index = Index3D::Zero(); + for (index(first_axis) = 0; index(first_axis) < kVoxelsPerSide; + index(first_axis)++) { + for (index(second_axis) = 0; index(second_axis) < kVoxelsPerSide; + index(second_axis)++) { + // Get the min and max of the band. + Index3D bottom_index = index; + bottom_index(axis_to_sweep) = 0; + Index3D top_index = index; + top_index(axis_to_sweep) = kVoxelsPerSide - 1; + EsdfVoxel* bottom_voxel = + &esdf_block + ->voxels[bottom_index.x()][bottom_index.y()][bottom_index.z()]; + EsdfVoxel* top_voxel = + &esdf_block->voxels[top_index.x()][top_index.y()][top_index.z()]; + Eigen::Vector3i bottom_parent_relative = bottom_voxel->parent_direction; + bottom_parent_relative += bottom_index; + Eigen::Vector3i top_parent_relative = top_voxel->parent_direction; + top_parent_relative += top_index; + + // Check if they're actually valid, lol. + bool top_valid = top_voxel->observed && top_voxel->squared_distance_vox < + max_squared_distance_vox; + bool bottom_valid = + bottom_voxel->observed && + bottom_voxel->squared_distance_vox < max_squared_distance_vox; + + for (index(axis_to_sweep) = 0; index(axis_to_sweep) < kVoxelsPerSide; + index(axis_to_sweep)++) { + EsdfVoxel* esdf_voxel = + &esdf_block->voxels[index.x()][index.y()][index.z()]; + if (!esdf_voxel->observed || esdf_voxel->is_site) { + continue; + } + // Check if we should update this with either the top or the bottom + // voxel. + if (bottom_valid) { + Eigen::Vector3i potential_direction = bottom_parent_relative - index; + float potential_distance = potential_direction.squaredNorm(); + if (esdf_voxel->squared_distance_vox > potential_distance) { + esdf_voxel->parent_direction = potential_direction; + esdf_voxel->squared_distance_vox = potential_distance; + } + } + if (top_valid) { + Eigen::Vector3i potential_direction = top_parent_relative - index; + float potential_distance = potential_direction.squaredNorm(); + if (esdf_voxel->squared_distance_vox > potential_distance) { + esdf_voxel->parent_direction = potential_direction; + esdf_voxel->squared_distance_vox = potential_distance; + } + } + } + } + } +} + +void EsdfIntegrator::sweepBlockBandOnCPU(int axis_to_sweep, float voxel_size, + EsdfBlock* esdf_block) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const float max_distance_vox = max_distance_m_ / voxel_size; + const float max_squared_distance_vox = max_distance_vox * max_distance_vox; + + // Select the axes to increment. + int first_axis = 0, second_axis = 1; + getSweepAxes(axis_to_sweep, &first_axis, &second_axis); + + Index3D index = Index3D::Zero(); + + // Keep track of if we've seen any sites so far. + // The secondary pass has "fake" sites; i.e., sites outside of the block. + Index3D last_site = Index3D::Zero(); + bool site_found = false; + for (index(first_axis) = 0; index(first_axis) < kVoxelsPerSide; + index(first_axis)++) { + for (index(second_axis) = 0; index(second_axis) < kVoxelsPerSide; + index(second_axis)++) { + // First we sweep forward, then backwards. + for (int i = 0; i < 2; i++) { + last_site = Index3D::Zero(); + site_found = false; + int direction = 1; + int start_voxel = 0; + int end_voxel = kVoxelsPerSide; + if (i == 1) { + direction = -1; + start_voxel = kVoxelsPerSide - 1; + end_voxel = -1; + } + for (index(axis_to_sweep) = start_voxel; + index(axis_to_sweep) != end_voxel; + index(axis_to_sweep) += direction) { + EsdfVoxel* esdf_voxel = + &esdf_block->voxels[index.x()][index.y()][index.z()]; + if (!esdf_voxel->observed) { + continue; + } + // If this voxel is itself a site, then mark this for future voxels. + if (esdf_voxel->is_site) { + last_site = index; + site_found = true; + } else if (!site_found) { + // If this voxel isn't a site but we haven't found a site yet, + // then if this voxel is valid we set it as the site. + if (esdf_voxel->squared_distance_vox < max_squared_distance_vox) { + site_found = true; + last_site = esdf_voxel->parent_direction + index; + } + } else { + // If we've found the site, then should just decide what to do + // here. + Index3D potential_direction = last_site - index; + float potential_distance = potential_direction.squaredNorm(); + // Either it hasn't been set at all or it's closer to the site + // than to its current value. + if (esdf_voxel->squared_distance_vox > potential_distance) { + esdf_voxel->parent_direction = potential_direction; + esdf_voxel->squared_distance_vox = potential_distance; + } else if (esdf_voxel->squared_distance_vox < + max_squared_distance_vox) { + // If the current value is a better site, then set it as a site. + last_site = esdf_voxel->parent_direction + index; + } + } + } + } + } + } +} + +void EsdfIntegrator::getSweepAxes(int axis_to_sweep, int* first_axis, + int* second_axis) const { + // Pick an order of the axes to sweep. + switch (axis_to_sweep) { + case 0: + *first_axis = 1; + *second_axis = 2; + break; + case 1: + *first_axis = 0; + *second_axis = 2; + break; + case 2: + *first_axis = 0; + *second_axis = 1; + break; + } +} + +void EsdfIntegrator::clearVoxel(EsdfVoxel* voxel, + float max_squared_distance_vox) { + voxel->parent_direction.setZero(); + voxel->squared_distance_vox = max_squared_distance_vox; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/integrators/frustum.cpp b/nvblox/src/integrators/frustum.cpp new file mode 100644 index 00000000..70f86dd0 --- /dev/null +++ b/nvblox/src/integrators/frustum.cpp @@ -0,0 +1,61 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/integrators/frustum.h" + +#include "nvblox/core/bounding_boxes.h" + +namespace nvblox { + +std::vector FrustumCalculator::getBlocksInView( + const Transform& T_L_C, const Camera& camera, const float block_size, + const float max_distance) { + CHECK_GT(max_distance, 0.0f); + + // View frustum + const Frustum frustum = camera.getViewFrustum(T_L_C, 0.0f, max_distance); + + // Coarse bound: AABB + const AxisAlignedBoundingBox aabb_L = frustum.getAABB(); + std::vector block_indices_in_aabb = + getBlockIndicesTouchedByBoundingBox(block_size, aabb_L); + + // Tight bound: View frustum + std::vector block_indices_in_frustum; + for (const Index3D& block_index : block_indices_in_aabb) { + const AxisAlignedBoundingBox& aabb_block = + getAABBOfBlock(block_size, block_index); + if (frustum.isAABBInView(aabb_block)) { + block_indices_in_frustum.push_back(block_index); + } + } + return block_indices_in_frustum; +} + +std::vector FrustumCalculator::getBlocksInImageView( + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + const float block_size, const float truncation_distance_m, + const float max_integration_distance_m) { + float min_depth, max_depth; + std::tie(min_depth, max_depth) = image::minmaxGPU(depth_frame); + float max_depth_plus_trunc = max_depth + truncation_distance_m; + if (max_integration_distance_m > 0.0f) { + max_depth_plus_trunc = + std::min(max_depth_plus_trunc, max_integration_distance_m); + } + return getBlocksInView(T_L_C, camera, block_size, max_depth_plus_trunc); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/integrators/projective_integrator_base.cpp b/nvblox/src/integrators/projective_integrator_base.cpp new file mode 100644 index 00000000..5e5dd7ac --- /dev/null +++ b/nvblox/src/integrators/projective_integrator_base.cpp @@ -0,0 +1,78 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/integrators/projective_integrator_base.h" + +#include "nvblox/core/bounding_boxes.h" + +namespace nvblox { + +float ProjectiveIntegratorBase::truncation_distance_vox() const { + return truncation_distance_vox_; +} + +float ProjectiveIntegratorBase::max_weight() const { return max_weight_; } + +float ProjectiveIntegratorBase::max_integration_distance_m() const { + return max_integration_distance_m_; +} + +void ProjectiveIntegratorBase::truncation_distance_vox( + float truncation_distance_vox) { + CHECK_GT(truncation_distance_vox, 0.0f); + truncation_distance_vox_ = truncation_distance_vox; +} + +void ProjectiveIntegratorBase::max_weight(float max_weight) { + CHECK_GT(max_weight, 0.0f); + max_weight_ = max_weight; +} + +void ProjectiveIntegratorBase::max_integration_distance_m( + float max_integration_distance_m) { + CHECK_GT(max_integration_distance_m, 0.0f); + max_integration_distance_m_ = max_integration_distance_m; +} + +float ProjectiveIntegratorBase::truncation_distance_m(float block_size) const { + return truncation_distance_vox_ * block_size / + VoxelBlock::kVoxelsPerSide; +} + +std::vector ProjectiveIntegratorBase::getBlocksInView( + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + const float block_size) const { + return FrustumCalculator::getBlocksInImageView( + depth_frame, T_L_C, camera, block_size, truncation_distance_m(block_size), + max_integration_distance_m_); +} + +std::vector ProjectiveIntegratorBase::getBlocksInView( + const Transform& T_L_C, const Camera& camera, + const float block_size) const { + return frustum_calculator_.getBlocksInView( + T_L_C, camera, block_size, + max_integration_distance_m_ + truncation_distance_m(block_size)); +} + +std::vector ProjectiveIntegratorBase::getBlocksInViewUsingRaycasting( + const DepthImage& depth_frame, const Transform& T_L_C, const Camera& camera, + const float block_size) const { + return frustum_calculator_.getBlocksInImageViewCuda( + depth_frame, T_L_C, camera, block_size, truncation_distance_m(block_size), + max_integration_distance_m_); +} + +} // namespace nvblox diff --git a/nvblox/src/io/csv.cpp b/nvblox/src/io/csv.cpp new file mode 100644 index 00000000..d3a45fcc --- /dev/null +++ b/nvblox/src/io/csv.cpp @@ -0,0 +1,79 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/io/csv.h" + +#include +#include + +namespace nvblox { +namespace io { + +void writeToCsv(const std::string& filepath, const DepthImage& frame) { + LOG(INFO) << "Writing DepthImage to: " << filepath; + + // Create a temporary image in unified memory if the image resides in device + // memory. + const DepthImage* write_frame_ptr; + std::unique_ptr tmp; + if (frame.memory_type() == MemoryType::kDevice) { + tmp = std::make_unique(frame, MemoryType::kUnified); + write_frame_ptr = tmp.get(); + } else { + write_frame_ptr = &frame; + } + + std::ofstream file_stream(filepath, std::ofstream::out); + for (int row_idx = 0; row_idx < write_frame_ptr->rows(); row_idx++) { + for (int col_idx = 0; col_idx < write_frame_ptr->cols(); col_idx++) { + file_stream << (*write_frame_ptr)(row_idx, col_idx) << " "; + } + file_stream << "\n"; + } + file_stream.close(); +} + +void writeToCsv(const std::string& filepath, const ColorImage& frame) { + LOG(INFO) << "Writing ColorImage to: " << filepath; + + // Create a temporary image in unified memory if the image resides in device + // memory. + const ColorImage* write_frame_ptr; + std::unique_ptr tmp; + if (frame.memory_type() == MemoryType::kDevice) { + tmp = std::make_unique(frame, MemoryType::kUnified); + write_frame_ptr = tmp.get(); + } else { + write_frame_ptr = &frame; + } + + std::ofstream file_stream(filepath, std::ofstream::out); + for (int row_idx = 0; row_idx < write_frame_ptr->rows(); row_idx++) { + for (int col_idx = 0; col_idx < write_frame_ptr->cols(); col_idx++) { + file_stream << std::to_string((*write_frame_ptr)(row_idx, col_idx).r) + << " " + << std::to_string((*write_frame_ptr)(row_idx, col_idx).g) + << " " + << std::to_string((*write_frame_ptr)(row_idx, col_idx).b) + << " "; + } + file_stream + << "\n"; // just write a big long line and sort it out in the loader + } + file_stream.close(); +} + +} // namespace io +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/io/mesh_io.cpp b/nvblox/src/io/mesh_io.cpp new file mode 100644 index 00000000..57132199 --- /dev/null +++ b/nvblox/src/io/mesh_io.cpp @@ -0,0 +1,45 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/io/mesh_io.h" + +#include + +#include "nvblox/io/ply_writer.h" +#include "nvblox/mesh/mesh.h" + +namespace nvblox { +namespace io { + +bool outputMeshLayerToPly(const BlockLayer& layer, + const std::string& filename) { + // TODO: doesn't support intensity yet!!!! + const Mesh mesh = Mesh::fromLayer(layer); + + // Create the ply writer object + io::PlyWriter writer(filename); + writer.setPoints(&mesh.vertices); + writer.setTriangles(&mesh.triangles); + if (mesh.normals.size() > 0) { + writer.setNormals(&mesh.normals); + } + if (mesh.colors.size() > 0) { + writer.setColors(&mesh.colors); + } + return writer.write(); +} + +} // namespace io +} // namespace nvblox diff --git a/nvblox/src/io/ply_writer.cpp b/nvblox/src/io/ply_writer.cpp new file mode 100644 index 00000000..43529b2d --- /dev/null +++ b/nvblox/src/io/ply_writer.cpp @@ -0,0 +1,137 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/io/ply_writer.h" + +namespace nvblox { +namespace io { + +bool PlyWriter::write() { + if (!file_) { + // Output a warning -- couldn't open file? + LOG(WARNING) << "Could not open file for PLY output."; + return false; + } + if (points_ == nullptr || points_->size() == 0) { + LOG(ERROR) << "No points added, nothing to output."; + return false; + } + + // Double-check that the rest of the sizes match. + if (intensities_ != nullptr && points_->size() != intensities_->size()) { + LOG(ERROR) << "Intensity size does not match point size."; + return false; + } + + if (colors_ != nullptr && points_->size() != colors_->size()) { + LOG(ERROR) << "Color size does not match point size."; + return false; + } + if (normals_ != nullptr && points_->size() != normals_->size()) { + LOG(ERROR) << "Normal size does not match point size."; + return false; + } + // Triangles can be whatever size they like. + + // Output the header first. + writeHeader(); + + // Then output all the points. + writePoints(); + + // Then triangles. + writeTriangles(); + + return true; +} + +void PlyWriter::writeHeader() { + file_ << "ply" << std::endl; + file_ << "format ascii 1.0" << std::endl; + file_ << "element vertex " << points_->size() << std::endl; + file_ << "property float x" << std::endl; + file_ << "property float y" << std::endl; + file_ << "property float z" << std::endl; + + if (normals_) { + // TODO: should this be normal_x or nx? + file_ << "property float nx" << std::endl; + file_ << "property float ny" << std::endl; + file_ << "property float nz" << std::endl; + } + + if (intensities_) { + file_ << "property float intensity" << std::endl; + } + + if (colors_) { + file_ << "property uchar red" << std::endl; + file_ << "property uchar green" << std::endl; + file_ << "property uchar blue" << std::endl; + } + + if (triangles_) { + // TODO: check if "vertex_index" or "vertex_indices" + file_ << "element face " << triangles_->size() / 3 << std::endl; + file_ << "property list uchar int vertex_indices" + << std::endl; // pcl-1.7(ros::kinetic) breaks ply convention by not + // using "vertex_index" + } + file_ << "end_header" << std::endl; +} + +void PlyWriter::writePoints() { + // We've already checked that all the ones that exist have a matching size. + for (size_t i = 0; i < points_->size(); i++) { + const Vector3f vertex = (*points_)[i]; + file_ << vertex.x() << " " << vertex.y() << " " << vertex.z(); + + if (normals_) { + const Vector3f& normal = (*normals_)[i]; + file_ << " " << normal.x() << " " << normal.y() << " " << normal.z(); + } + + if (intensities_) { + file_ << " " << (*intensities_)[i]; + } + + if (colors_) { + const Color& color = (*colors_)[i]; + file_ << " " << std::to_string(color.r) << " " << std::to_string(color.g) + << " " << std::to_string(color.b); + } + file_ << std::endl; + } +} + +void PlyWriter::writeTriangles() { + constexpr int kTriangleSize = 3; + if (triangles_) { + for (size_t i = 0; i < triangles_->size(); i += kTriangleSize) { + file_ << kTriangleSize << " "; + + for (int j = 0; j < kTriangleSize; j++) { + file_ << (*triangles_)[i + j] << " "; + } + + file_ << std::endl; + } + } +} + +} // namespace io +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/mesh/marching_cubes.cu b/nvblox/src/mesh/marching_cubes.cu new file mode 100644 index 00000000..7b73c49d --- /dev/null +++ b/nvblox/src/mesh/marching_cubes.cu @@ -0,0 +1,189 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/types.h" +#include "nvblox/mesh/impl/marching_cubes_table.h" + +#include "nvblox/mesh/marching_cubes.h" + +namespace nvblox { +namespace marching_cubes { + +int calculateVertexConfiguration(const float vertex_sdf[8]) { + return (vertex_sdf[0] < 0 ? (1 << 0) : 0) | + (vertex_sdf[1] < 0 ? (1 << 1) : 0) | + (vertex_sdf[2] < 0 ? (1 << 2) : 0) | + (vertex_sdf[3] < 0 ? (1 << 3) : 0) | + (vertex_sdf[4] < 0 ? (1 << 4) : 0) | + (vertex_sdf[5] < 0 ? (1 << 5) : 0) | + (vertex_sdf[6] < 0 ? (1 << 6) : 0) | + (vertex_sdf[7] < 0 ? (1 << 7) : 0); +} + +int neighborIndexFromDirection(const Index3D& direction) { + return (direction.x() << 2 | direction.y() << 1 | direction.z() << 0); +} + +Index3D directionFromNeighborIndex(const int index) { + return Index3D((index & (1 << 2)) >> 2, (index & (1 << 1)) >> 1, + (index & (1 << 0)) >> 0); +} + +/// Performs linear interpolation on two cube corners to find the approximate +/// zero crossing (surface) value. +Vector3f interpolateVertex(const Vector3f& vertex1, const Vector3f& vertex2, + float sdf1, float sdf2) { + constexpr float kMinSdfDifference = 1e-4; + const float sdf_diff = sdf1 - sdf2; + // Only compute the actual interpolation value if the sdf_difference is not + // too small, this is to counteract issues with floating point precision. + if (abs(sdf_diff) >= kMinSdfDifference) { + const float t = sdf1 / sdf_diff; + return Vector3f(vertex1 + t * (vertex2 - vertex1)); + } else { + return Vector3f(0.5 * (vertex1 + vertex2)); + } +} + +void interpolateEdgeVertices( + const PerVoxelMarchingCubesResults& marching_cubes_results, + Eigen::Matrix* edge_coords) { + CHECK(edge_coords != NULL); + for (size_t i = 0; i < 12; ++i) { + const uint8_t* pairs = kEdgeIndexPairs[i]; + const int edge0 = pairs[0]; + const int edge1 = pairs[1]; + // Only interpolate along edges where there is a zero crossing. + if ((marching_cubes_results.vertex_sdf[edge0] < 0 && + marching_cubes_results.vertex_sdf[edge1] >= 0) || + (marching_cubes_results.vertex_sdf[edge0] >= 0 && + marching_cubes_results.vertex_sdf[edge1] < 0)) + edge_coords->col(i) = + interpolateVertex(marching_cubes_results.vertex_coords[edge0], + marching_cubes_results.vertex_coords[edge1], + marching_cubes_results.vertex_sdf[edge0], + marching_cubes_results.vertex_sdf[edge1]); + } +} + +void meshCube(const PerVoxelMarchingCubesResults& marching_cubes_results, + MeshBlock* mesh) { + CHECK_NOTNULL(mesh); + const int table_index = marching_cubes_results.marching_cubes_table_index; + + // No edges in this cube. + if (table_index == 0 || table_index == 255) { + return; + } + + Eigen::Matrix edge_vertex_coordinates; + interpolateEdgeVertices(marching_cubes_results, &edge_vertex_coordinates); + + const int8_t* table_row = kTriangleTable[table_index]; + + int table_col = 0; + int next_index = mesh->vertices.size(); + + // Resize the mesh block to contain all the vertices. + int num_triangles = kNumTrianglesTable[table_index]; + + table_col = 0; + for (int i = 0; i < num_triangles; i++) { + mesh->vertices.push_back( + edge_vertex_coordinates.col(table_row[table_col + 2])); + mesh->vertices.push_back( + edge_vertex_coordinates.col(table_row[table_col + 1])); + mesh->vertices.push_back(edge_vertex_coordinates.col(table_row[table_col])); + mesh->triangles.push_back(next_index); + mesh->triangles.push_back(next_index + 1); + mesh->triangles.push_back(next_index + 2); + const Vector3f& p0 = mesh->vertices[next_index]; + const Vector3f& p1 = mesh->vertices[next_index + 1]; + const Vector3f& p2 = mesh->vertices[next_index + 2]; + Vector3f px = (p1 - p0); + Vector3f py = (p2 - p0); + Vector3f n = px.cross(py).normalized(); + mesh->normals.push_back(n); + mesh->normals.push_back(n); + mesh->normals.push_back(n); + next_index += 3; + table_col += 3; + } +} + +__device__ void calculateOutputIndex( + PerVoxelMarchingCubesResults* marching_cubes_results, int* size) { + // How many vertices in this voxel + const uint8_t table_index = + marching_cubes_results->marching_cubes_table_index; + const int num_vertices_in_voxel = kNumVertsTable[table_index]; + + // No edges in this cube. + if (num_vertices_in_voxel == 0) { + return; + } + + // Calculate: + // - the start index where this voxel starts outputing, and + // - the total number of vertices in this mesh block (once all threads + // finish). + marching_cubes_results->vertex_vector_start_index = + atomicAdd(size, num_vertices_in_voxel); +} + +__device__ void calculateVertices( + const PerVoxelMarchingCubesResults& marching_cubes_results, + CudaMeshBlock* mesh) { + const uint8_t table_index = marching_cubes_results.marching_cubes_table_index; + const int num_triangles_in_voxel = kNumTrianglesTable[table_index]; + + if (num_triangles_in_voxel == 0) { + return; + } + + // The position in the block that we start output for this voxel. + int next_index = marching_cubes_results.vertex_vector_start_index; + + Eigen::Matrix edge_vertex_coordinates; + interpolateEdgeVertices(marching_cubes_results, &edge_vertex_coordinates); + + const int8_t* table_row = kTriangleTable[table_index]; + int table_col = 0; + for (int i = 0; i < num_triangles_in_voxel; i++) { + mesh->vertices[next_index] = + edge_vertex_coordinates.col(table_row[table_col + 2]); + mesh->vertices[next_index + 1] = + edge_vertex_coordinates.col(table_row[table_col + 1]); + mesh->vertices[next_index + 2] = + edge_vertex_coordinates.col(table_row[table_col]); + mesh->triangles[next_index] = next_index; + mesh->triangles[next_index + 1] = next_index + 1; + mesh->triangles[next_index + 2] = next_index + 2; + const Vector3f& p0 = mesh->vertices[next_index]; + const Vector3f& p1 = mesh->vertices[next_index + 1]; + const Vector3f& p2 = mesh->vertices[next_index + 2]; + Vector3f px = (p1 - p0); + Vector3f py = (p2 - p0); + Vector3f n = px.cross(py).normalized(); + mesh->normals[next_index] = n; + mesh->normals[next_index + 1] = n; + mesh->normals[next_index + 2] = n; + next_index += 3; + table_col += 3; + } +} + +} // namespace marching_cubes +} // namespace nvblox diff --git a/nvblox/src/mesh/mesh.cpp b/nvblox/src/mesh/mesh.cpp new file mode 100644 index 00000000..dab4a45b --- /dev/null +++ b/nvblox/src/mesh/mesh.cpp @@ -0,0 +1,69 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +namespace nvblox { + +Mesh Mesh::fromLayer(const BlockLayer& layer) { + Mesh mesh; + + // Keep track of the vertex index. + int next_index = 0; + + // Iterate over every block in the layer. + const std::vector indices = layer.getAllBlockIndices(); + + for (const Index3D& index : indices) { + MeshBlock::ConstPtr block = layer.getBlockAtIndex(index); + + // Copy over. + const std::vector vertices = block->getVertexVectorOnCPU(); + mesh.vertices.resize(mesh.vertices.size() + vertices.size()); + std::copy(vertices.begin(), vertices.end(), + mesh.vertices.begin() + next_index); + + const std::vector normals = block->getNormalVectorOnCPU(); + mesh.normals.resize(mesh.normals.size() + normals.size()); + std::copy(normals.begin(), normals.end(), + mesh.normals.begin() + next_index); + + const std::vector colors = block->getColorVectorOnCPU(); + mesh.colors.resize(mesh.colors.size() + colors.size()); + std::copy(colors.begin(), colors.end(), mesh.colors.begin() + next_index); + + // Our simple mesh implementation has: + // - per vertex colors + // - per vertex normals + CHECK((vertices.size() == normals.size()) || (normals.size() == 0)); + CHECK((vertices.size() == vertices.size()) || (colors.size() == 0)); + + // Copy over the triangles. + const std::vector triangles = block->getTriangleVectorOnCPU(); + std::vector triangle_indices(triangles.size()); + // Increment all triangle indices. + std::transform(triangles.begin(), triangles.end(), triangle_indices.begin(), + std::bind2nd(std::plus(), next_index)); + + mesh.triangles.insert(mesh.triangles.end(), triangle_indices.begin(), + triangle_indices.end()); + + next_index += vertices.size(); + } + + return mesh; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/mesh/mesh_block.cu b/nvblox/src/mesh/mesh_block.cu new file mode 100644 index 00000000..bb1f1234 --- /dev/null +++ b/nvblox/src/mesh/mesh_block.cu @@ -0,0 +1,90 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/mesh/mesh_block.h" + +namespace nvblox { + +MeshBlock::MeshBlock(MemoryType memory_type) + : vertices(memory_type), + normals(memory_type), + triangles(memory_type), + colors(memory_type) {} + +void MeshBlock::clear() { + vertices.resize(0); + normals.resize(0); + triangles.resize(0); + colors.resize(0); +} + +void MeshBlock::resizeToNumberOfVertices(size_t new_size) { + vertices.resize(new_size); + normals.resize(new_size); + triangles.resize(new_size); +} + +void MeshBlock::reserveNumberOfVertices(size_t new_capacity) { + vertices.reserve(new_capacity); + normals.reserve(new_capacity); + triangles.reserve(new_capacity); +} + +MeshBlock::Ptr MeshBlock::allocate(MemoryType memory_type) { + return std::make_shared(memory_type); +} + +std::vector MeshBlock::getVertexVectorOnCPU() const { + return vertices.toVector(); +} + +std::vector MeshBlock::getNormalVectorOnCPU() const { + return normals.toVector(); +} + +std::vector MeshBlock::getTriangleVectorOnCPU() const { + return triangles.toVector(); +} + +std::vector MeshBlock::getColorVectorOnCPU() const { + return colors.toVector(); +} + +size_t MeshBlock::size() const { return vertices.size(); } + +size_t MeshBlock::capacity() const { return vertices.capacity(); } + +void MeshBlock::expandColorsToMatchVertices() { + colors.reserve(vertices.capacity()); + colors.resize(vertices.size()); +} + +void MeshBlock::expandIntensitiesToMatchVertices() { + intensities.reserve(vertices.capacity()); + intensities.resize(vertices.size()); +} + +// Set the pointers to point to the mesh block. +CudaMeshBlock::CudaMeshBlock(MeshBlock* block) { + CHECK_NOTNULL(block); + vertices = block->vertices.data(); + normals = block->normals.data(); + triangles = block->triangles.data(); + colors = block->colors.data(); + + size = block->vertices.size(); +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/mesh/mesh_integrator.cpp b/nvblox/src/mesh/mesh_integrator.cpp new file mode 100644 index 00000000..e97f3cc6 --- /dev/null +++ b/nvblox/src/mesh/mesh_integrator.cpp @@ -0,0 +1,221 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/utils/timing.h" + +#include "nvblox/mesh/mesh_integrator.h" + +namespace nvblox { + +MeshIntegrator::MeshIntegrator() { + // clang-format off + cube_index_offsets_ << 0, 1, 1, 0, 0, 1, 1, 0, + 0, 0, 1, 1, 0, 0, 1, 1, + 0, 0, 0, 0, 1, 1, 1, 1; + // clang-format on +} + +bool MeshIntegrator::integrateMeshFromDistanceField( + const TsdfLayer& distance_layer, BlockLayer* mesh_layer, + const DeviceType device_type) { + // First, get all the blocks. + std::vector block_indices = distance_layer.getAllBlockIndices(); + if (device_type == DeviceType::kCPU) { + return integrateBlocksCPU(distance_layer, block_indices, mesh_layer); + } else { + return integrateBlocksGPU(distance_layer, block_indices, mesh_layer); + } +} + +bool MeshIntegrator::integrateBlocksCPU( + const TsdfLayer& distance_layer, const std::vector& block_indices, + BlockLayer* mesh_layer) { + timing::Timer mesh_timer("mesh/integrate"); + CHECK_NOTNULL(mesh_layer); + CHECK_NEAR(distance_layer.block_size(), mesh_layer->block_size(), 1e-4); + + // Figure out which of these actually contain something worth meshing. + const float block_size = distance_layer.block_size(); + const float voxel_size = distance_layer.voxel_size(); + + // For each block... + for (const Index3D& block_index : block_indices) { + // Get the block. + VoxelBlock::ConstPtr block = + distance_layer.getBlockAtIndex(block_index); + + // Check meshability - basically if this contains anything near the + // border. + if (!isBlockMeshable(block, voxel_size * 2)) { + continue; + } + + // Get all the neighbor blocks. + std::vector::ConstPtr> neighbor_blocks(8); + for (int i = 0; i < 8; i++) { + Index3D neighbor_index = + block_index + marching_cubes::directionFromNeighborIndex(i); + neighbor_blocks[i] = distance_layer.getBlockAtIndex(neighbor_index); + } + + // Get all the potential triangles: + std::vector + triangle_candidates; + getTriangleCandidatesInBlock(block, neighbor_blocks, block_index, + block_size, &triangle_candidates); + + if (triangle_candidates.size() <= 0) { + continue; + } + + // Allocate the mesh block. + MeshBlock::Ptr mesh_block = mesh_layer->allocateBlockAtIndex(block_index); + + // Then actually calculate the triangles. + for (const marching_cubes::PerVoxelMarchingCubesResults& candidate : + triangle_candidates) { + marching_cubes::meshCube(candidate, mesh_block.get()); + } + } + + return true; +} + +bool MeshIntegrator::isBlockMeshable( + const VoxelBlock::ConstPtr block, float cutoff) const { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + Index3D voxel_index; + // Iterate over all the voxels: + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + const TsdfVoxel* voxel = + &block->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + + // Check if voxel distance is within the cutoff to determine if + // there's going to be a surface boundary in this block. + if (voxel->weight >= min_weight_ && + std::abs(voxel->distance) <= cutoff) { + return true; + } + } + } + } + return false; +} + +void MeshIntegrator::getTriangleCandidatesInBlock( + const TsdfBlock::ConstPtr block, + const std::vector& neighbor_blocks, + const Index3D& block_index, const float block_size, + std::vector* + triangle_candidates) { + CHECK_NOTNULL(block); + CHECK_NOTNULL(triangle_candidates); + + constexpr int kVoxelsPerSide = TsdfBlock::kVoxelsPerSide; + const float voxel_size = block_size / kVoxelsPerSide; + + Index3D voxel_index; + // Iterate over all inside voxels. + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + // Get the position of this voxel. + Vector3f voxel_position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + + marching_cubes::PerVoxelMarchingCubesResults neighbors; + // Figure out if this voxel is actually a triangle candidate. + if (getTriangleCandidatesAroundVoxel(block, neighbor_blocks, + voxel_index, voxel_position, + voxel_size, &neighbors)) { + triangle_candidates->push_back(neighbors); + } + } + } + } +} + +bool MeshIntegrator::getTriangleCandidatesAroundVoxel( + const TsdfBlock::ConstPtr block, + const std::vector::ConstPtr>& neighbor_blocks, + const Index3D& voxel_index, const Vector3f& voxel_position, + const float voxel_size, + marching_cubes::PerVoxelMarchingCubesResults* neighbors) { + DCHECK_EQ(neighbor_blocks.size(), 8); + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + for (unsigned int i = 0; i < 8; ++i) { + Index3D corner_index = voxel_index + cube_index_offsets_.col(i); + + // Are we in bounds? If not, have to get a neighbor. + // The neighbor should correspond to the index in neighbor blocks. + + bool search_neighbor = false; + Index3D block_offset(0, 0, 0); + for (int j = 0; j < 3; j++) { + if (corner_index[j] >= kVoxelsPerSide) { + // Here the index is too much. + corner_index(j) -= kVoxelsPerSide; + block_offset(j) = 1; + search_neighbor = true; + } + } + + const TsdfVoxel* voxel = nullptr; + // Get the voxel either from the current block or from the corresponding + // neighbor. + if (search_neighbor) { + // Find the correct neighbor block. + int neighbor_index = + marching_cubes::neighborIndexFromDirection(block_offset); + if (neighbor_blocks[neighbor_index] == nullptr) { + return false; + } + voxel = + &neighbor_blocks[neighbor_index] + ->voxels[corner_index.x()][corner_index.y()][corner_index.z()]; + } else { + voxel = + &block->voxels[corner_index.x()][corner_index.y()][corner_index.z()]; + } + + // If any of the neighbors are not observed, this can't be a mesh + // triangle. + if (voxel->weight < min_weight_) { + return false; + } + neighbors->vertex_sdf[i] = voxel->distance; + neighbors->vertex_coords[i] = + voxel_position + voxel_size * cube_index_offsets_.col(i).cast(); + } + + // Figure out the index if we've made it this far. + neighbors->marching_cubes_table_index = + marching_cubes::calculateVertexConfiguration(neighbors->vertex_sdf); + + // Index 0 & 255 contain no triangles so not worth outputting it. + return neighbors->marching_cubes_table_index != 0 && + neighbors->marching_cubes_table_index != 255; +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/mesh/mesh_integrator.cu b/nvblox/src/mesh/mesh_integrator.cu new file mode 100644 index 00000000..0135a80b --- /dev/null +++ b/nvblox/src/mesh/mesh_integrator.cu @@ -0,0 +1,480 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include +#include +#include +#include +#include +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/common_names.h" +#include "nvblox/integrators/integrators_common.h" +#include "nvblox/mesh/impl/marching_cubes_table.h" +#include "nvblox/mesh/marching_cubes.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +MeshIntegrator::~MeshIntegrator() { + if (cuda_stream_ != nullptr) { + cudaStreamDestroy(cuda_stream_); + } +} + +bool MeshIntegrator::integrateBlocksGPU( + const TsdfLayer& distance_layer, const std::vector& block_indices, + BlockLayer* mesh_layer) { + timing::Timer mesh_timer("mesh/gpu/integrate"); + CHECK_NOTNULL(mesh_layer); + CHECK_NEAR(distance_layer.block_size(), mesh_layer->block_size(), 1e-4); + if (block_indices.empty()) { + return true; + } + + // Initialize the stream if not done yet. + if (cuda_stream_ == nullptr) { + checkCudaErrors(cudaStreamCreate(&cuda_stream_)); + } + + // Figure out which of these actually contain something worth meshing. + float block_size = distance_layer.block_size(); + float voxel_size = distance_layer.voxel_size(); + + // Clear all blocks if they exist. + for (const Index3D& block_index : block_indices) { + MeshBlock::Ptr mesh_block = mesh_layer->getBlockAtIndex(block_index); + if (mesh_block) { + mesh_block->clear(); + } + } + + // First create a list of meshable blocks. + std::vector meshable_blocks; + timing::Timer meshable_timer("mesh/gpu/get_meshable"); + getMeshableBlocksGPU(distance_layer, block_indices, 5 * voxel_size, + &meshable_blocks); + meshable_timer.Stop(); + + // Then get all the candidates and mesh each block. + timing::Timer mesh_blocks_timer("mesh/gpu/mesh_blocks"); + + meshBlocksGPU(distance_layer, meshable_blocks, mesh_layer); + + // TODO: optionally weld here as well. + mesh_blocks_timer.Stop(); + + return true; +} + +// Kernels + +// Takes in a vector of blocks, and outputs an integer true if that block is +// meshable. +// Block size MUST be voxels_per_side x voxels_per_side x voxel_per_size. +// Grid size can be anything. +__global__ void isBlockMeshableKernel(int num_blocks, + const VoxelBlock** blocks, + float cutoff_distance, float min_weight, + bool* meshable) { + dim3 voxel_index = threadIdx; + // This for loop allows us to have fewer threadblocks than there are + // blocks in this computation. We assume the threadblock size is constant + // though to make our lives easier. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // Get the correct voxel for this index. + const TsdfVoxel& voxel = + blocks[block_index] + ->voxels[voxel_index.z][voxel_index.y][voxel_index.x]; + if (fabs(voxel.distance) <= cutoff_distance && voxel.weight >= min_weight) { + meshable[block_index] = true; + } + } +} + +// Takes in a set of blocks arranged in neighbor sets and their relative +// positions, then finds vertex candidates, and finally creates the output +// meshes for them. +// Block size MUST be voxels_per_side x voxels_per_side x voxel_per_size. +// Grid size can be anything. +__global__ void meshBlocksCalculateTableIndicesKernel( + int num_blocks, const VoxelBlock** blocks, + const Vector3f* block_positions, float voxel_size, float min_weight, + marching_cubes::PerVoxelMarchingCubesResults* marching_cubes_results, + int* mesh_block_sizes) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + constexpr int kVoxelsPerBlock = + kVoxelsPerSide * kVoxelsPerSide * kVoxelsPerSide; + constexpr int kCubeNeighbors = 8; + + const dim3 voxel_index = dim3(threadIdx.z, threadIdx.y, threadIdx.x); + + const int linear_thread_idx = + threadIdx.x + + kVoxelsPerSide * (threadIdx.y + kVoxelsPerSide * threadIdx.z); + + // Preallocate a half voxel size. + const Vector3f half_voxel(0.5f, 0.5f, 0.5f); + + marching_cubes::PerVoxelMarchingCubesResults marching_cubes_results_local; + + // This for loop allows us to have fewer threadblocks than there are + // blocks in this computation. We assume the threadblock size is constant + // though to make our lives easier. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // Initialize the calculated output size for this block. + __shared__ int mesh_block_size; + if (linear_thread_idx == 0) { + mesh_block_size = 0; + } + __syncthreads(); + + // Getting the block pointer is complicated now so let's just get it. + const VoxelBlock* block = blocks[block_index * kCubeNeighbors]; + + // Get the linear index of the this voxel in this block + const int vertex_neighbor_idx = + block_index * kVoxelsPerBlock + linear_thread_idx; + + // Check all 8 neighbors. + bool skip_voxel = false; + for (unsigned int i = 0; i < 8; ++i) { + Index3D corner_index( + voxel_index.x + marching_cubes::kCornerIndexOffsets[i][0], + voxel_index.y + marching_cubes::kCornerIndexOffsets[i][1], + voxel_index.z + marching_cubes::kCornerIndexOffsets[i][2]); + Index3D block_offset(0, 0, 0); + bool search_neighbor = false; + // Are we in bounds? If not, have to get a neighbor. + // The neighbor should correspond to the index in neighbor blocks. + for (int j = 0; j < 3; j++) { + if (corner_index[j] >= kVoxelsPerSide) { + // Here the index is too much. + corner_index(j) -= kVoxelsPerSide; + block_offset(j) = 1; + search_neighbor = true; + } + } + + const TsdfVoxel* voxel = nullptr; + // Don't look for neighbors for now. + if (search_neighbor) { + int neighbor_index = + marching_cubes::neighborIndexFromDirection(block_offset); + const VoxelBlock* neighbor_block = + blocks[block_index * kCubeNeighbors + neighbor_index]; + if (neighbor_block == nullptr) { + skip_voxel = true; + break; + } + voxel = + &neighbor_block + ->voxels[corner_index.x()][corner_index.y()][corner_index.z()]; + } else { + voxel = + &block + ->voxels[corner_index.x()][corner_index.y()][corner_index.z()]; + } + // If any of the neighbors are not observed, this can't be a mesh + // triangle. + if (voxel->weight < min_weight) { + skip_voxel = true; + break; + } + + // Calculate the position of this voxel. + marching_cubes_results_local.vertex_sdf[i] = voxel->distance; + marching_cubes_results_local.vertex_coords[i] = + block_positions[block_index] + + voxel_size * (corner_index.cast() + half_voxel + + (kVoxelsPerSide * block_offset).cast()); + } + + if (!skip_voxel) { + // If we've made it this far, this needs to be meshed. + marching_cubes_results_local.contains_mesh = true; + + // Calculate the index into the magic marching cubes table + marching_cubes_results_local.marching_cubes_table_index = + marching_cubes::calculateVertexConfiguration( + marching_cubes_results_local.vertex_sdf); + + // Mesh this cube. This will keep track of what index we're at within + // the cube. + marching_cubes::calculateOutputIndex(&marching_cubes_results_local, + &mesh_block_size); + + // Write out to global memory + marching_cubes_results[vertex_neighbor_idx] = + marching_cubes_results_local; + } + + // Writing the shared variable block size to global memory (per block) + __syncthreads(); + if (linear_thread_idx == 0) { + mesh_block_sizes[block_index] = mesh_block_size; + } + } +} + +__global__ void meshBlocksCalculateVerticesKernel( + int num_blocks, + const marching_cubes::PerVoxelMarchingCubesResults* marching_cubes_results, + const int* mesh_block_sizes, CudaMeshBlock* mesh_blocks) { + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + + const int linear_thread_idx = + threadIdx.x + + kVoxelsPerSide * (threadIdx.y + kVoxelsPerSide * threadIdx.z); + + // This for loop allows us to have fewer threadblocks than there are + // blocks in this computation. We assume the threadblock size is constant + // though to make our lives easier. + for (int block_index = blockIdx.x; block_index < num_blocks; + block_index += gridDim.x) { + // If this block contains a mesh + if (mesh_block_sizes[block_index] > 0) { + // Get the linear index of the this voxel in this block + constexpr int kVoxelsPerBlock = + kVoxelsPerSide * kVoxelsPerSide * kVoxelsPerSide; + const int vertex_neighbor_idx = + block_index * kVoxelsPerBlock + linear_thread_idx; + + // If this voxel contains a mesh + if (marching_cubes_results[vertex_neighbor_idx].contains_mesh) { + // Convert the marching cube table index into vertex coordinates + marching_cubes::calculateVertices( + marching_cubes_results[vertex_neighbor_idx], + &mesh_blocks[block_index]); + } + } + } +} + +// Wrappers + +void MeshIntegrator::getMeshableBlocksGPU( + const TsdfLayer& distance_layer, const std::vector& block_indices, + float cutoff_distance, std::vector* meshable_blocks) { + CHECK_NOTNULL(meshable_blocks); + if (block_indices.size() == 0) { + return; + } + + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + // One block per block, 1 thread per pixel. :) + // Dim block can be smaller, but dim_threads must be the same. + int dim_block = block_indices.size(); + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + + // Collect all the meshable blocks as raw pointers. + // Get all the block pointers and positions. + block_ptrs_host_.resize(block_indices.size()); + + for (size_t i = 0; i < block_indices.size(); i++) { + block_ptrs_host_[i] = + distance_layer.getBlockAtIndex(block_indices[i]).get(); + } + + block_ptrs_device_ = block_ptrs_host_; + + // Allocate a device vector that holds the meshable result. + meshable_device_.resize(block_indices.size()); + meshable_device_.setZero(); + + checkCudaErrors(cudaPeekAtLastError()); + isBlockMeshableKernel<<>>( + block_indices.size(), block_ptrs_device_.data(), cutoff_distance, + min_weight_, meshable_device_.data()); + checkCudaErrors(cudaPeekAtLastError()); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + + meshable_host_ = meshable_device_; + + for (size_t i = 0; i < block_indices.size(); i++) { + if (meshable_host_[i]) { + meshable_blocks->push_back(block_indices[i]); + } + } +} + +void MeshIntegrator::meshBlocksGPU(const TsdfLayer& distance_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer) { + if (block_indices.empty()) { + return; + } + timing::Timer mesh_prep_timer("mesh/gpu/mesh_blocks/prep"); + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + constexpr int kCubeNeighbors = 8; + + // One block per block, 1 thread per voxel. :) + // Dim block can be smaller, but dim_threads must be the same. + int dim_block = block_indices.size(); + dim3 dim_threads(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + + // Get the block and voxel size. + const float block_size = distance_layer.block_size(); + const float voxel_size = distance_layer.voxel_size(); + + // Get all the block pointers and positions. + // Block pointers are actually a 2D array of also the neighbor block pointers + // The neighbors CAN be null so they need to be checked. + block_ptrs_host_.resize(block_indices.size() * kCubeNeighbors); + block_positions_host_.resize(block_indices.size()); + for (size_t i = 0; i < block_indices.size(); i++) { + block_ptrs_host_[i * kCubeNeighbors] = + distance_layer.getBlockAtIndex(block_indices[i]).get(); + for (size_t j = 1; j < kCubeNeighbors; j++) { + // Get the pointers to all the neighbors as well. + block_ptrs_host_[i * kCubeNeighbors + j] = + distance_layer + .getBlockAtIndex(block_indices[i] + + marching_cubes::directionFromNeighborIndex(j)) + .get(); + } + block_positions_host_[i] = + getPositionFromBlockIndex(block_size, block_indices[i]); + } + + // Create an output mesh blocks vector.. + mesh_blocks_host_.resize(block_indices.size()); + + block_ptrs_device_ = block_ptrs_host_; + block_positions_device_ = block_positions_host_; + + // Allocate working space + constexpr int kNumVoxelsPerBlock = + kVoxelsPerSide * kVoxelsPerSide * kVoxelsPerSide; + marching_cubes_results_device_.resize(block_indices.size() * + kNumVoxelsPerBlock); + marching_cubes_results_device_.setZero(); + mesh_block_sizes_device_.resize(block_indices.size()); + mesh_block_sizes_device_.setZero(); + mesh_prep_timer.Stop(); + + // Run the first half of marching cubes and calculate: + // - the per-vertex indexes into the magic triangle table + // - the number of vertices in each mesh block. + timing::Timer mesh_kernel_1_timer("mesh/gpu/mesh_blocks/kernel_table"); + meshBlocksCalculateTableIndicesKernel<<>>( + block_indices.size(), block_ptrs_device_.data(), + block_positions_device_.data(), voxel_size, min_weight_, + marching_cubes_results_device_.data(), mesh_block_sizes_device_.data()); + checkCudaErrors(cudaPeekAtLastError()); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + + mesh_kernel_1_timer.Stop(); + + // Copy back the new mesh block sizes (so we can allocate space) + timing::Timer mesh_copy_timer("mesh/gpu/mesh_blocks/copy_out"); + mesh_block_sizes_host_ = mesh_block_sizes_device_; + mesh_copy_timer.Stop(); + + // Allocate mesh blocks + timing::Timer mesh_allocation_timer("mesh/gpu/mesh_blocks/block_allocation"); + for (size_t i = 0; i < block_indices.size(); i++) { + const int num_vertices = mesh_block_sizes_host_[i]; + + if (num_vertices > 0) { + MeshBlock::Ptr output_block = + mesh_layer->allocateBlockAtIndex(block_indices[i]); + + // Grow the vector with a growth factor and a minimum allocation to avoid + // repeated reallocation + if (num_vertices > output_block->capacity()) { + constexpr int kMinimumMeshBlockTrianglesPerVoxel = 1; + constexpr int kMinimumMeshBlockVertices = + kNumVoxelsPerBlock * kMinimumMeshBlockTrianglesPerVoxel * 3; + constexpr int kMeshBlockOverallocationFactor = 2; + const int num_vertices_to_allocate = + std::max(kMinimumMeshBlockVertices, + num_vertices * kMeshBlockOverallocationFactor); + output_block->reserveNumberOfVertices(num_vertices_to_allocate); + } + output_block->resizeToNumberOfVertices(num_vertices); + mesh_blocks_host_[i] = CudaMeshBlock(output_block.get()); + } + } + mesh_blocks_device_ = mesh_blocks_host_; + mesh_allocation_timer.Stop(); + + // Run the second half of marching cubes + // - Translating the magic table indices into triangle vertices and writing + // them into the mesh layer. + timing::Timer mesh_kernel_2_timer("mesh/gpu/mesh_blocks/kernel_vertices"); + meshBlocksCalculateVerticesKernel<<>>( + block_indices.size(), marching_cubes_results_device_.data(), + mesh_block_sizes_device_.data(), mesh_blocks_device_.data()); + checkCudaErrors(cudaPeekAtLastError()); + checkCudaErrors(cudaStreamSynchronize(cuda_stream_)); + mesh_kernel_2_timer.Stop(); + + // Optional third stage: welding. + if (weld_vertices_) { + timing::Timer welding_timer("mesh/gpu/mesh_blocks/welding"); + weldVertices(block_indices, mesh_layer); + } +} + +void MeshIntegrator::weldVertices(const std::vector& block_indices, + BlockLayer* mesh_layer) { + for (const Index3D& index : block_indices) { + MeshBlock::Ptr mesh_block = mesh_layer->getBlockAtIndex(index); + + if (!mesh_block || mesh_block->size() <= 3) { + continue; + } + + // Store a copy of the input vertices. + input_vertices_ = mesh_block->vertices; + input_normals_ = mesh_block->normals; + + // sort vertices to bring duplicates together + thrust::sort(thrust::device, mesh_block->vertices.begin(), + mesh_block->vertices.end(), VectorCompare()); + + // Find unique vertices and erase redundancies. The iterator will point to + // the new last index. + auto iterator = thrust::unique(thrust::device, mesh_block->vertices.begin(), + mesh_block->vertices.end()); + + // Figure out the new size. + size_t new_size = iterator - mesh_block->vertices.begin(); + mesh_block->vertices.resize(new_size); + mesh_block->normals.resize(new_size); + + // Find the indices of the original triangles. + thrust::lower_bound(thrust::device, mesh_block->vertices.begin(), + mesh_block->vertices.end(), input_vertices_.begin(), + input_vertices_.end(), mesh_block->triangles.begin(), + VectorCompare()); + + // Reshuffle the normals to match. + thrust::scatter(thrust::device, input_normals_.begin(), + input_normals_.end(), mesh_block->triangles.begin(), + mesh_block->normals.begin()); + } +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/mesh/mesh_integrator_color.cu b/nvblox/src/mesh/mesh_integrator_color.cu new file mode 100644 index 00000000..162995d9 --- /dev/null +++ b/nvblox/src/mesh/mesh_integrator_color.cu @@ -0,0 +1,295 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/common_names.h" +#include "nvblox/integrators/integrators_common.h" +#include "nvblox/mesh/impl/marching_cubes_table.h" +#include "nvblox/mesh/marching_cubes.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +void MeshIntegrator::colorMesh(const ColorLayer& color_layer, + BlockLayer* mesh_layer) { + colorMesh(color_layer, mesh_layer->getAllBlockIndices(), mesh_layer); +} +void MeshIntegrator::colorMesh(const ColorLayer& color_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer) { + // Default choice is GPU + colorMeshGPU(color_layer, block_indices, mesh_layer); +} + +// +/* Color Mesh blocks on the GPU + * + * Call with + * - one ThreadBlock per VoxelBlock, GridDim 1D + * - BlockDim 1D, any size: we implement a stridded access pattern over + * MeshBlock verticies + * + * @param: color_blocks: a list of color blocks which correspond in position + * to mesh_blocks + * @param: block_indices: a list of blocks indices. + * @param: cuda_mesh_blocks: a list of mesh_blocks to be colored. + */ +__global__ void colorMeshBlockByClosestColorVoxel( + const ColorBlock** color_blocks, const Index3D* block_indices, + const float block_size, const float voxel_size, + CudaMeshBlock* cuda_mesh_blocks) { + // Block + const ColorBlock* color_block_ptr = color_blocks[blockIdx.x]; + const Index3D block_index = block_indices[blockIdx.x]; + CudaMeshBlock cuda_mesh_block = cuda_mesh_blocks[blockIdx.x]; + + // The position of this block in the layer + const Vector3f p_L_B_m = getPositionFromBlockIndex(block_size, block_index); + + // Interate through MeshBlock vertices - Stidded access pattern + for (int i = threadIdx.x; i < cuda_mesh_block.size; i += blockDim.x) { + // The position of this vertex in the layer + const Vector3f p_L_V_m = cuda_mesh_block.vertices[i]; + + // The position of this vertex in the block + const Vector3f p_B_V_m = p_L_V_m - p_L_B_m; + + // Convert this to a voxel index + Index3D voxel_idx_in_block = (p_B_V_m.array() / voxel_size).cast(); + + // NOTE(alexmillane): Here we make some assumptions. + // - We assume that the closest voxel to p_L_V is in the ColorBlock + // co-located with the MeshBlock from which p_L_V was drawn. + // - This is will (very?) occationally be incorrect when mesh vertices + // escape block boundaries. However, making this assumption saves us any + // neighbour calculations. + constexpr size_t KVoxelsPerSizeMinusOne = + VoxelBlock::kVoxelsPerSide - 1; + voxel_idx_in_block = + voxel_idx_in_block.array().min(KVoxelsPerSizeMinusOne).max(0); + + // Get the color voxel + const ColorVoxel color_voxel = + color_block_ptr->voxels[voxel_idx_in_block.x()] // NOLINT + [voxel_idx_in_block.y()] // NOLINT + [voxel_idx_in_block.z()]; + + // Write the color out to global memory + cuda_mesh_block.colors[i] = color_voxel.color; + } +} + +__global__ void colorMeshBlocksConstant(Color color, + CudaMeshBlock* cuda_mesh_blocks) { + // Each threadBlock operates on a single MeshBlock + CudaMeshBlock cuda_mesh_block = cuda_mesh_blocks[blockIdx.x]; + // Interate through MeshBlock vertices - Stidded access pattern + for (int i = threadIdx.x; i < cuda_mesh_block.size; i += blockDim.x) { + cuda_mesh_block.colors[i] = color; + } +} + +void colorMeshBlocksConstantGPU(const std::vector& block_indices, + const Color& color, MeshLayer* mesh_layer, + cudaStream_t cuda_stream) { + CHECK_NOTNULL(mesh_layer); + if (block_indices.size() == 0) { + return; + } + + // Prepare CudaMeshBlocks, which are effectively containers of device pointers + std::vector cuda_mesh_blocks; + cuda_mesh_blocks.resize(block_indices.size()); + for (int i = 0; i < block_indices.size(); i++) { + cuda_mesh_blocks[i] = + CudaMeshBlock(mesh_layer->getBlockAtIndex(block_indices[i]).get()); + } + + // Allocate + CudaMeshBlock* cuda_mesh_block_device_ptrs; + checkCudaErrors(cudaMalloc(&cuda_mesh_block_device_ptrs, + cuda_mesh_blocks.size() * sizeof(CudaMeshBlock))); + + // Host -> GPU + checkCudaErrors( + cudaMemcpyAsync(cuda_mesh_block_device_ptrs, cuda_mesh_blocks.data(), + cuda_mesh_blocks.size() * sizeof(CudaMeshBlock), + cudaMemcpyHostToDevice, cuda_stream)); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kThreadsPerBlock = 8 * 32; // Chosen at random + const int num_blocks = block_indices.size(); + colorMeshBlocksConstant<<>>( + Color::Gray(), // NOLINT + cuda_mesh_block_device_ptrs); + checkCudaErrors(cudaStreamSynchronize(cuda_stream)); + checkCudaErrors(cudaPeekAtLastError()); + + // Deallocate + checkCudaErrors(cudaFree(cuda_mesh_block_device_ptrs)); +} + +void colorMeshBlockByClosestColorVoxelGPU( + const ColorLayer& color_layer, const std::vector& block_indices, + MeshLayer* mesh_layer, cudaStream_t cuda_stream) { + CHECK_NOTNULL(mesh_layer); + if (block_indices.size() == 0) { + return; + } + + // Get the locations (on device) of the color blocks + // NOTE(alexmillane): This function assumes that all block_indices have been + // checked to exist in color_layer. + std::vector color_blocks = + getBlockPtrsFromIndices(block_indices, color_layer); + + // Prepare CudaMeshBlocks, which are effectively containers of device pointers + std::vector cuda_mesh_blocks; + cuda_mesh_blocks.resize(block_indices.size()); + for (int i = 0; i < block_indices.size(); i++) { + cuda_mesh_blocks[i] = + CudaMeshBlock(mesh_layer->getBlockAtIndex(block_indices[i]).get()); + } + + // Allocate + const ColorBlock** color_block_device_ptrs; + checkCudaErrors(cudaMalloc(&color_block_device_ptrs, + color_blocks.size() * sizeof(ColorBlock*))); + Index3D* block_indices_device_ptr; + checkCudaErrors(cudaMalloc(&block_indices_device_ptr, + block_indices.size() * sizeof(Index3D))); + CudaMeshBlock* cuda_mesh_block_device_ptrs; + checkCudaErrors(cudaMalloc(&cuda_mesh_block_device_ptrs, + cuda_mesh_blocks.size() * sizeof(CudaMeshBlock))); + + // Host -> GPU transfers + checkCudaErrors(cudaMemcpyAsync(color_block_device_ptrs, color_blocks.data(), + color_blocks.size() * sizeof(ColorBlock*), + cudaMemcpyHostToDevice, cuda_stream)); + checkCudaErrors(cudaMemcpyAsync(block_indices_device_ptr, + block_indices.data(), + block_indices.size() * sizeof(Index3D), + cudaMemcpyHostToDevice, cuda_stream)); + checkCudaErrors( + cudaMemcpyAsync(cuda_mesh_block_device_ptrs, cuda_mesh_blocks.data(), + cuda_mesh_blocks.size() * sizeof(CudaMeshBlock), + cudaMemcpyHostToDevice, cuda_stream)); + + // Kernel call - One ThreadBlock launched per VoxelBlock + constexpr int kThreadsPerBlock = 8 * 32; // Chosen at random + const int num_blocks = block_indices.size(); + const float voxel_size = + mesh_layer->block_size() / VoxelBlock::kVoxelsPerSide; + colorMeshBlockByClosestColorVoxel<<>>( + color_block_device_ptrs, // NOLINT + block_indices_device_ptr, // NOLINT + mesh_layer->block_size(), // NOLINT + voxel_size, // NOLINT + cuda_mesh_block_device_ptrs); + checkCudaErrors(cudaStreamSynchronize(cuda_stream)); + checkCudaErrors(cudaPeekAtLastError()); + + // Deallocate + checkCudaErrors(cudaFree(color_block_device_ptrs)); + checkCudaErrors(cudaFree(block_indices_device_ptr)); + checkCudaErrors(cudaFree(cuda_mesh_block_device_ptrs)); +} + +void MeshIntegrator::colorMeshGPU(const ColorLayer& color_layer, + MeshLayer* mesh_layer) { + colorMeshGPU(color_layer, mesh_layer->getAllBlockIndices(), mesh_layer); +} + +void MeshIntegrator::colorMeshGPU( + const ColorLayer& color_layer, + const std::vector& requested_block_indices, + MeshLayer* mesh_layer) { + CHECK_NOTNULL(mesh_layer); + CHECK_EQ(color_layer.block_size(), mesh_layer->block_size()); + + // NOTE(alexmillane): Generally, some of the MeshBlocks which we are + // "coloring" will not have data in the color layer. HOWEVER, for colored + // MeshBlocks (ie with non-empty color members), the size of the colors must + // match vertices. Therefore we "color" all requested block_indices in two + // parts: + // - The first part using the color layer, and + // - the second part a constant color. + + // Check for each index, that the MeshBlock exists, and if it does + // allocate space for color. + std::vector block_indices; + block_indices.reserve(requested_block_indices.size()); + std::for_each( + requested_block_indices.begin(), requested_block_indices.end(), + [&mesh_layer, &block_indices](const Index3D& block_idx) { + if (mesh_layer->isBlockAllocated(block_idx)) { + mesh_layer->getBlockAtIndex(block_idx)->expandColorsToMatchVertices(); + block_indices.push_back(block_idx); + } + }); + + // Split block indices into two groups, one group containing indices with + // corresponding ColorBlocks, and one without. + std::vector block_indices_in_color_layer; + std::vector block_indices_not_in_color_layer; + block_indices_in_color_layer.reserve(block_indices.size()); + block_indices_not_in_color_layer.reserve(block_indices.size()); + for (const Index3D& block_idx : block_indices) { + if (color_layer.isBlockAllocated(block_idx)) { + block_indices_in_color_layer.push_back(block_idx); + } else { + block_indices_not_in_color_layer.push_back(block_idx); + } + } + + // Color + colorMeshBlockByClosestColorVoxelGPU( + color_layer, block_indices_in_color_layer, mesh_layer, cuda_stream_); + colorMeshBlocksConstantGPU(block_indices_not_in_color_layer, + default_mesh_color_, mesh_layer, cuda_stream_); +} + +void MeshIntegrator::colorMeshCPU(const ColorLayer& color_layer, + BlockLayer* mesh_layer) { + colorMeshCPU(color_layer, mesh_layer->getAllBlockIndices(), mesh_layer); +} + +void MeshIntegrator::colorMeshCPU(const ColorLayer& color_layer, + const std::vector& block_indices, + BlockLayer* mesh_layer) { + // For each vertex just grab the closest color + for (const Index3D& block_idx : block_indices) { + MeshBlock::Ptr block = mesh_layer->getBlockAtIndex(block_idx); + if (block == nullptr) { + continue; + } + block->colors.resize(block->vertices.size()); + for (int i = 0; i < block->vertices.size(); i++) { + const Vector3f& vertex = block->vertices[i]; + const ColorVoxel* color_voxel; + if (getVoxelAtPosition(color_layer, vertex, &color_voxel)) { + block->colors[i] = color_voxel->color; + } else { + block->colors[i] = Color::Gray(); + } + } + } +} + +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/primitives/primitives.cpp b/nvblox/src/primitives/primitives.cpp new file mode 100644 index 00000000..efb097e4 --- /dev/null +++ b/nvblox/src/primitives/primitives.cpp @@ -0,0 +1,332 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original voxblox license, which this file is heavily based on: ---- +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "nvblox/primitives/primitives.h" + +namespace nvblox { +namespace primitives { + +float Sphere::getDistanceToPoint(const Vector3f& point) const { + float distance = (center_ - point).norm() - radius_; + return distance; +} + +bool Sphere::getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const { + // From https://en.wikipedia.org/wiki/Line%E2%80%93sphere_intersection + // x = o + dl is the ray equation + // r = sphere radius, c = sphere center + float under_square_root = pow(ray_direction.dot(ray_origin - center_), 2) - + (ray_origin - center_).squaredNorm() + + pow(radius_, 2); + + // No real roots = no intersection. + if (under_square_root < 0.0) { + return false; + } + + float d = + -(ray_direction.dot(ray_origin - center_)) - sqrt(under_square_root); + + // Intersection behind the origin. + if (d < 0.0) { + return false; + } + // Intersection greater than max dist, so no intersection in the sensor + // range. + if (d > max_dist) { + return false; + } + + *intersect_point = ray_origin + d * ray_direction; + *intersect_dist = d; + return true; +} + +float Cube::getDistanceToPoint(const Vector3f& point) const { + // Solution from http://stackoverflow.com/questions/5254838/ + // calculating-distance-between-a-point-and-a-rectangular-box-nearest-point + + Vector3f distance_vector = Vector3f::Zero(); + distance_vector.x() = + std::max(std::max(center_.x() - size_.x() / 2.0 - point.x(), 0.0), + point.x() - center_.x() - size_.x() / 2.0); + distance_vector.y() = + std::max(std::max(center_.y() - size_.y() / 2.0 - point.y(), 0.0), + point.y() - center_.y() - size_.y() / 2.0); + distance_vector.z() = + std::max(std::max(center_.z() - size_.z() / 2.0 - point.z(), 0.0), + point.z() - center_.z() - size_.z() / 2.0); + + float distance = distance_vector.norm(); + + // Basically 0... Means it's inside! + if (distance < kEpsilon) { + distance_vector.x() = std::max(center_.x() - size_.x() / 2.0 - point.x(), + point.x() - center_.x() - size_.x() / 2.0); + distance_vector.y() = std::max(center_.y() - size_.y() / 2.0 - point.y(), + point.y() - center_.y() - size_.y() / 2.0); + distance_vector.z() = std::max(center_.z() - size_.z() / 2.0 - point.z(), + point.z() - center_.z() - size_.z() / 2.0); + distance = distance_vector.maxCoeff(); + } + + return distance; +} + +bool Cube::getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const { + // Adapted from https://www.scratchapixel.com/lessons/3d-basic-rendering/ + // minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection + // Compute min and max limits in 3D. + + // Precalculate signs and inverse directions. + Vector3f inv_dir(1.0 / ray_direction.x(), 1.0 / ray_direction.y(), + 1.0 / ray_direction.z()); + Eigen::Vector3i ray_sign(inv_dir.x() < 0.0, inv_dir.y() < 0.0, + inv_dir.z() < 0.0); + + Vector3f bounds[2]; + bounds[0] = center_ - size_ / 2.0; + bounds[1] = center_ + size_ / 2.0; + + float tmin = (bounds[ray_sign.x()].x() - ray_origin.x()) * inv_dir.x(); + float tmax = (bounds[1 - ray_sign.x()].x() - ray_origin.x()) * inv_dir.x(); + float tymin = (bounds[ray_sign.y()].y() - ray_origin.y()) * inv_dir.y(); + float tymax = (bounds[1 - ray_sign.y()].y() - ray_origin.y()) * inv_dir.y(); + + if ((tmin > tymax) || (tymin > tmax)) return false; + if (tymin > tmin) tmin = tymin; + if (tymax < tmax) tmax = tymax; + + float tzmin = (bounds[ray_sign.z()].z() - ray_origin.z()) * inv_dir.z(); + float tzmax = (bounds[1 - ray_sign.z()].z() - ray_origin.z()) * inv_dir.z(); + + if ((tmin > tzmax) || (tzmin > tmax)) return false; + if (tzmin > tmin) tmin = tzmin; + if (tzmax < tmax) tmax = tzmax; + + float t = tmin; + if (t < 0.0) { + t = tmax; + if (t < 0.0) { + return false; + } + } + + if (t > max_dist) { + return false; + } + + *intersect_dist = t; + *intersect_point = ray_origin + ray_direction * t; + + return true; +} + +float Plane::getDistanceToPoint(const Vector3f& point) const { + // Compute the 'd' in ax + by + cz + d = 0: + // This is actually the scalar product I guess. + float d = -normal_.dot(center_); + float p = d / normal_.norm(); + + float distance = normal_.dot(point) + p; + return distance; +} + +bool Plane::getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const { + // From https://en.wikipedia.org/wiki/Line%E2%80%93plane_intersection + // Following notation of sphere more... + // x = o + dl is the ray equation + // n = normal, c = plane 'origin' + float denominator = ray_direction.dot(normal_); + if (std::abs(denominator) < kEpsilon) { + // Lines are parallel, no intersection. + return false; + } + float d = (center_ - ray_origin).dot(normal_) / denominator; + if (d < 0.0) { + return false; + } + if (d > max_dist) { + return false; + } + *intersect_point = ray_origin + d * ray_direction; + *intersect_dist = d; + return true; +} + +float Cylinder::getDistanceToPoint(const Vector3f& point) const { + // From: https://math.stackexchange.com/questions/2064745/ + // 3 cases, depending on z of point. + // First case: in plane with the cylinder. This also takes care of inside + // case. + float distance = 0.0; + + float min_z_limit = center_.z() - height_ / 2.0; + float max_z_limit = center_.z() + height_ / 2.0; + if (point.z() >= min_z_limit && point.z() <= max_z_limit) { + distance = (point.head<2>() - center_.head<2>()).norm() - radius_; + } else if (point.z() > max_z_limit) { + // Case 2: above the cylinder. + distance = + std::sqrt(std::max((point.head<2>() - center_.head<2>()).squaredNorm() - + radius_ * radius_, + static_cast(0.0)) + + (point.z() - max_z_limit) * (point.z() - max_z_limit)); + } else { + // Case 3: below cylinder. + distance = + std::sqrt(std::max((point.head<2>() - center_.head<2>()).squaredNorm() - + radius_ * radius_, + static_cast(0.0)) + + (point.z() - min_z_limit) * (point.z() - min_z_limit)); + } + return distance; +} + +bool Cylinder::getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* intersect_point, + float* intersect_dist) const { + // From http://woo4.me/wootracer/cylinder-intersection/ + // and http://www.cl.cam.ac.uk/teaching/1999/AGraphHCI/SMAG/node2.html + // Define ray as P = E + tD, where E is ray_origin and D is ray_direction. + // We define our cylinder as centered in the xy coordinate system, so + // E in this case is actually ray_origin - center_. + Vector3f vector_E = ray_origin - center_; + Vector3f vector_D = ray_direction; // Axis aligned. + + float a = vector_D.x() * vector_D.x() + vector_D.y() * vector_D.y(); + float b = 2 * vector_E.x() * vector_D.x() + 2 * vector_E.y() * vector_D.y(); + float c = vector_E.x() * vector_E.x() + vector_E.y() * vector_E.y() - + radius_ * radius_; + + // t = (-b +- sqrt(b^2 - 4ac))/2a + // t only has solutions if b^2 - 4ac >= 0 + float t1 = -1.0; + float t2 = -1.0; + + // Make sure we don't divide by 0. + if (std::abs(a) < kEpsilon) { + return false; + } + + float under_square_root = b * b - 4 * a * c; + if (under_square_root < 0) { + return false; + } + if (under_square_root <= kEpsilon) { + t1 = -b / (2 * a); + // Just keep t2 at invalid default value. + } else { + // 2 ts. + t1 = (-b + std::sqrt(under_square_root)) / (2 * a); + t2 = (-b - std::sqrt(under_square_root)) / (2 * a); + } + + // Great, now we got some ts, time to figure out whether we hit the cylinder + // or the endcaps. + float t = max_dist; + + float z1 = vector_E.z() + t1 * vector_D.z(); + float z2 = vector_E.z() + t2 * vector_D.z(); + bool t1_valid = t1 >= 0.0 && z1 >= -height_ / 2.0 && z1 <= height_ / 2.0; + bool t2_valid = t2 >= 0.0 && z2 >= -height_ / 2.0 && z2 <= height_ / 2.0; + + // Get the endcaps and their validity. + // Check end-cap intersections now... :( + float t3, t4; + bool t3_valid = false, t4_valid = false; + + // Make sure we don't divide by 0. + if (std::abs(vector_D.z()) > kEpsilon) { + // t3 is the bottom end-cap, t4 is the top. + t3 = (-height_ / 2.0 - vector_E.z()) / vector_D.z(); + t4 = (height_ / 2.0 - vector_E.z()) / vector_D.z(); + + Vector3f q3 = vector_E + t3 * vector_D; + Vector3f q4 = vector_E + t4 * vector_D; + + t3_valid = t3 >= 0.0 && q3.head<2>().norm() < radius_; + t4_valid = t4 >= 0.0 && q4.head<2>().norm() < radius_; + } + + if (!(t1_valid || t2_valid || t3_valid || t4_valid)) { + return false; + } + if (t1_valid) { + t = std::min(t, t1); + } + if (t2_valid) { + t = std::min(t, t2); + } + if (t3_valid) { + t = std::min(t, t3); + } + if (t4_valid) { + t = std::min(t, t4); + } + + // Intersection greater than max dist, so no intersection in the sensor + // range. + if (t >= max_dist) { + return false; + } + + // Back to normal coordinates now. + *intersect_point = ray_origin + t * ray_direction; + *intersect_dist = t; + return true; +} + +} // namespace primitives +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/primitives/scene.cpp b/nvblox/src/primitives/scene.cpp new file mode 100644 index 00000000..51298290 --- /dev/null +++ b/nvblox/src/primitives/scene.cpp @@ -0,0 +1,161 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original voxblox license, which this file is heavily based on: ---- +Copyright (c) 2016, ETHZ ASL +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of voxblox nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "nvblox/primitives/scene.h" + +namespace nvblox { +namespace primitives { + +Scene::Scene() : aabb_(Vector3f(-5.0, -5.0, -1.0), Vector3f(5.0, 5.0, 9.0)) {} + +void Scene::addPrimitive(std::unique_ptr primitive) { + primitives_.emplace_back(std::move(primitive)); +} + +void Scene::addGroundLevel(float height) { + primitives_.emplace_back( + new Plane(Vector3f(0.0, 0.0, height), Vector3f(0.0, 0.0, 1.0))); +} + +void Scene::addCeiling(float height) { + primitives_.emplace_back( + new Plane(Vector3f(0.0, 0.0, height), Vector3f(0.0, 0.0, -1.0))); +} + +void Scene::addPlaneBoundaries(float x_min, float x_max, float y_min, + float y_max) { + // X planes: + primitives_.emplace_back( + new Plane(Vector3f(x_min, 0.0, 0.0), Vector3f(1.0, 0.0, 0.0))); + primitives_.emplace_back( + new Plane(Vector3f(x_max, 0.0, 0.0), Vector3f(-1.0, 0.0, 0.0))); + + // Y planes: + primitives_.emplace_back( + new Plane(Vector3f(0.0, y_min, 0.0), Vector3f(0.0, 1.0, 0.0))); + primitives_.emplace_back( + new Plane(Vector3f(0.0, y_max, 0.0), Vector3f(0.0, -1.0, 0.0))); +} + +void Scene::clear() { primitives_.clear(); } + +float Scene::getSignedDistanceToPoint(const Vector3f& coords, + float max_dist) const { + float min_dist = max_dist; + for (const std::unique_ptr& primitive : primitives_) { + float primitive_dist = primitive->getDistanceToPoint(coords); + if (primitive_dist < min_dist) { + min_dist = primitive_dist; + } + } + + return min_dist; +} + +bool Scene::getRayIntersection(const Vector3f& ray_origin, + const Vector3f& ray_direction, float max_dist, + Vector3f* ray_intersection, + float* ray_dist) const { + CHECK_NOTNULL(ray_intersection); + CHECK_NOTNULL(ray_dist); + *ray_intersection = Vector3f::Zero(); + *ray_dist = max_dist; + bool ray_valid = false; + for (const std::unique_ptr& primitive : primitives_) { + Vector3f primitive_intersection; + float primitive_dist; + bool intersects = + primitive->getRayIntersection(ray_origin, ray_direction, max_dist, + &primitive_intersection, &primitive_dist); + if (intersects) { + if (!ray_valid || primitive_dist < *ray_dist) { + ray_valid = true; + *ray_dist = primitive_dist; + *ray_intersection = primitive_intersection; + } + } + } + + return ray_valid; +} + +void Scene::generateDepthImageFromScene(const Camera& camera, + const Transform& T_S_C, float max_dist, + DepthImage* depth_frame) const { + CHECK_NOTNULL(depth_frame); + CHECK(depth_frame->memory_type() == MemoryType::kUnified) + << "For scene generation DepthImage with memory_type == kUnified is " + "required."; + CHECK_EQ(depth_frame->rows(), camera.height()); + CHECK_EQ(depth_frame->cols(), camera.width()); + + const Transform T_C_S = T_S_C.inverse(); + + // Iterate over the entire image. + Index2D u_C; + for (u_C.x() = 0; u_C.x() < camera.width(); u_C.x()++) { + for (u_C.y() = 0; u_C.y() < camera.height(); u_C.y()++) { + // Get the ray going through this pixel. + const Vector3f ray_direction = + T_S_C.linear() * camera.rayFromPixelIndices(u_C).normalized(); + // Get the intersection point for this ray. + Vector3f ray_intersection; + float ray_dist; + if (getRayIntersection(T_S_C.translation(), ray_direction, max_dist, + &ray_intersection, &ray_dist)) { + // The ray intersection is expressed in the world coordinate frame. + // We must transform it back to the camera coordinate frame. + const Vector3f p_C = T_C_S * ray_intersection; + // Then we use the z coordinate in the camera frame to set the depth. + (*depth_frame)(u_C.y(), u_C.x()) = p_C.z(); + } else { + // Otherwise set the depth to 0.0 to mark it as invalid. + (*depth_frame)(u_C.y(), u_C.x()) = 0.0f; + } + } + } +} + +} // namespace primitives +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/src/ray_tracing/cuda/sphere_tracer.cu b/nvblox/src/ray_tracing/cuda/sphere_tracer.cu new file mode 100644 index 00000000..de08621d --- /dev/null +++ b/nvblox/src/ray_tracing/cuda/sphere_tracer.cu @@ -0,0 +1,278 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/ray_tracing/sphere_tracer.h" + +#include +#include +#include + +#include "nvblox/gpu_hash/cuda/gpu_hash_interface.cuh" +#include "nvblox/gpu_hash/cuda/gpu_indexing.cuh" +#include "nvblox/utils/timing.h" + +namespace nvblox { + +__device__ inline bool isTsdfVoxelValid(const TsdfVoxel& voxel) { + constexpr float kMinWeight = 1e-4; + return voxel.weight > kMinWeight; +} + +__device__ thrust::pair cast( + const Ray& ray, // NOLINT + Index3DDeviceHashMapType block_hash, // NOLINT + float truncation_distance_m, // NOLINT + float block_size_m, // NOLINT + int maximum_steps, // NOLINT + float maximum_ray_length_m, // NOLINT + float surface_distance_epsilon_m) { + // Approach: Step along the ray until we find the surface, or fail to + bool last_distance_positive = false; + // t captures the parameter scaling along ray.direction. We assume + // that the ray is normalized which such that t has units meters. + float t = 0.0f; + for (int i = 0; (i < maximum_steps) && (t < maximum_ray_length_m); i++) { + // Current point to sample + const Vector3f p_L = ray.origin + t * ray.direction; + + // Evaluate the distance at this point + float step; + TsdfVoxel* voxel_ptr; + + // Can't get distance, let's see what to do... + if (!getVoxelAtPosition(block_hash, p_L, block_size_m, &voxel_ptr) || + !isTsdfVoxelValid(*voxel_ptr)) { + // 1) We weren't in observed space before this, let's step through this + // (unobserved) shit and hope to hit something allocated. + if (!last_distance_positive) { + // step forward by the truncation distance + step = truncation_distance_m; + last_distance_positive = false; + } + // 2) We were in observed space, now we've left it... let's kill this + // ray, it's risky to continue. + // Note(alexmillane): The "risk" here is that we've somehow passed + // through the truncation band. This occurs occasionally. The risk + // of continuing is that we can then see through an object. It's safer + // to stop here and hope for better luck in the next frame. + else { + return {t, false}; + } + } + // We got a valid distance + else { + // Distance negative (or close to it)! + // We're gonna terminate, let's determine how. + if (voxel_ptr->distance < surface_distance_epsilon_m) { + // 1) We found a zero crossing. Terminate successfully. + if (last_distance_positive) { + // We "refine" the distance by back stepping the (now negative) + // distance value + t += voxel_ptr->distance; + // Output - Success! + return {t, true}; + } + // 2) We just went from unobserved to negative. We're observing + // something from behind, terminate. + else { + return {t, false}; + } + } + // Distance positive! + else { + // Step by this amount + step = voxel_ptr->distance; + last_distance_positive = true; + } + } + + // Step further along the ray + t += step; + } + // Ran out of number of steps or distance... Fail + return {t, false}; +} + +__global__ void sphereTracingKernel( + const Ray ray, // NOLINT + Index3DDeviceHashMapType block_hash, // NOLINT + float* t, // NOLINT + bool* success_flag, // NOLINT + float truncation_distance_m, // NOLINT + float block_size_m, // NOLINT + int maximum_steps, // NOLINT + float maximum_ray_length_m, // NOLINT + float surface_distance_epsilon_m) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx != 0) return; + + thrust::pair res = + cast(ray, block_hash, truncation_distance_m, block_size_m, maximum_steps, + maximum_ray_length_m, surface_distance_epsilon_m); + + *t = res.first; + *success_flag = res.second; +} + +__global__ void sphereTraceImageKernel( + const Camera camera, // NOLINT + const Transform T_S_C, // NOLINT + Index3DDeviceHashMapType block_hash, // NOLINT + float* image, // NOLINT + float truncation_distance_m, // NOLINT + float block_size_m, // NOLINT + int maximum_steps, // NOLINT + float maximum_ray_length_m, // NOLINT + float surface_distance_epsilon_m, // NOLINT + int ray_subsampling_factor) { + const int ray_col_idx = threadIdx.x + blockIdx.x * blockDim.x; + const int ray_row_idx = threadIdx.y + blockIdx.y * blockDim.y; + // Note: we ensure that this division works cleanly before getting here. + const int ray_rows = camera.rows() / ray_subsampling_factor; + const int ray_cols = camera.cols() / ray_subsampling_factor; + if ((ray_row_idx >= ray_rows) || (ray_col_idx >= ray_cols)) { + return; + } + + // Get the image-plane coordinates of where this ray should pass such that it + // is in the center of the patch it will represent. + constexpr float kHalf = 1.0f / 2.0f; + const Index2D ray_indices(ray_col_idx, ray_row_idx); + const Vector2f pixel_coords = + (ray_indices * ray_subsampling_factor).cast() + + kHalf * static_cast(ray_subsampling_factor) * Vector2f::Ones(); + + // Get the ray going through this pixel (in layer coordinate) + const Vector3f ray_direction_C = + camera.rayFromImagePlaneCoordinates(pixel_coords).normalized(); + const Ray ray_L{T_S_C.linear() * ray_direction_C, T_S_C.translation()}; + + // Cast the ray into the layer + thrust::pair t_optional = + cast(ray_L, block_hash, truncation_distance_m, block_size_m, + maximum_steps, maximum_ray_length_m, surface_distance_epsilon_m); + + // If success, write depth to image, otherwise write -1. + if (t_optional.second == true) { + const float depth = t_optional.first * ray_direction_C.z(); + image::access(ray_row_idx, ray_col_idx, ray_cols, image) = depth; + } else { + image::access(ray_row_idx, ray_col_idx, ray_cols, image) = -1.0f; + } +} + +SphereTracer::SphereTracer(Params params) : params_(std::move(params)) { + checkCudaErrors(cudaStreamCreate(&tracing_stream_)); + cudaMalloc(&t_device_, sizeof(float)); + cudaMalloc(&success_flag_device_, sizeof(bool)); +} + +SphereTracer::~SphereTracer() { + cudaStreamSynchronize(tracing_stream_); + cudaFree(t_device_); + cudaFree(success_flag_device_); + checkCudaErrors(cudaStreamDestroy(tracing_stream_)); +} + +bool SphereTracer::castOnGPU(const Ray& ray, const TsdfLayer& tsdf_layer, + const float truncation_distance_m, + float* t) const { + constexpr float eps = 1e-5; + CHECK_NEAR(ray.direction.norm(), 1.0, eps); + + // Get the GPU hash + GPULayerView gpu_layer_view = tsdf_layer.getGpuLayerView(); + + // Kernel + const float surface_distance_epsilon_m = + params_.surface_distance_epsilon_vox * tsdf_layer.voxel_size(); + sphereTracingKernel<<<1, 1, 0, tracing_stream_>>>( + ray, // NOLINT + gpu_layer_view.getHash().impl_, // NOLINT + t_device_, // NOLINT + success_flag_device_, // NOLINT + truncation_distance_m, // NOLINT + gpu_layer_view.block_size(), // NOLINT + params_.maximum_steps, // NOLINT + params_.maximum_ray_length_m, // NOLINT + surface_distance_epsilon_m); + + // GPU -> CPU + cudaMemcpyAsync(t, t_device_, sizeof(float), cudaMemcpyDeviceToHost, + tracing_stream_); + + checkCudaErrors(cudaStreamSynchronize(tracing_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + return true; +} + +std::shared_ptr SphereTracer::renderImageOnGPU( + const Camera& camera, const Transform& T_S_C, const TsdfLayer& tsdf_layer, + const float truncation_distance_m, + const MemoryType output_image_memory_type, + const int ray_subsampling_factor) { + CHECK_EQ(camera.width() % ray_subsampling_factor, 0); + CHECK_EQ(camera.height() % ray_subsampling_factor, 0); + // Output space + const int image_height = camera.height() / ray_subsampling_factor; + const int image_width = camera.width() / ray_subsampling_factor; + // If we get a request for a different size image, reallocate. + if (!depth_image_ || depth_image_->width() != image_width || + depth_image_->height() != image_height || + depth_image_->memory_type() != output_image_memory_type) { + depth_image_ = std::make_shared(image_height, image_width, + output_image_memory_type); + } + + // Get the GPU hash + timing::Timer hash_transfer_timer( + "color/integrate/sphere_trace/hash_transfer"); + GPULayerView gpu_layer_view = tsdf_layer.getGpuLayerView(); + hash_transfer_timer.Stop(); + + // Get metric surface distance epsilon + const float surface_distance_epsilon_m = + params_.surface_distance_epsilon_vox * tsdf_layer.voxel_size(); + + // Kernel + // Call params + // - 1 thread per pixel + // - 8 x 8 threads per thread block + // - N x M thread blocks get 1 thread per pixel + constexpr dim3 kThreadsPerThreadBlock(8, 8, 1); + const dim3 num_blocks( + depth_image_->cols() / kThreadsPerThreadBlock.y + 1, // NOLINT + depth_image_->rows() / kThreadsPerThreadBlock.x + 1, // NOLINT + 1); + sphereTraceImageKernel<<>>( + camera, // NOLINT + T_S_C, // NOLINT + gpu_layer_view.getHash().impl_, // NOLINT + depth_image_->dataPtr(), // NOLINT + truncation_distance_m, // NOLINT + gpu_layer_view.block_size(), // NOLINT + params_.maximum_steps, // NOLINT + params_.maximum_ray_length_m, // NOLINT + surface_distance_epsilon_m, // NOLINT + ray_subsampling_factor); + checkCudaErrors(cudaStreamSynchronize(tracing_stream_)); + checkCudaErrors(cudaPeekAtLastError()); + + return depth_image_; +} + +} // namespace nvblox diff --git a/nvblox/src/utils/nvtx_ranges.cpp b/nvblox/src/utils/nvtx_ranges.cpp new file mode 100644 index 00000000..86b6acf3 --- /dev/null +++ b/nvblox/src/utils/nvtx_ranges.cpp @@ -0,0 +1,106 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/utils/nvtx_ranges.h" + +#include + +#include + +namespace nvblox { +namespace timing { + +uint32_t colorToUint32(const Color& color) { + return 0xFF << 24 | // NOLINT + static_cast(color.r) << 16 | // NOLINT + static_cast(color.g) << 8 | // NOLINT + static_cast(color.b); // NOLINT +} + +const uint32_t nxtx_ranges_colors[] = { + colorToUint32(Color::Black()), colorToUint32(Color::Gray()), + colorToUint32(Color::Red()), colorToUint32(Color::Green()), + colorToUint32(Color::Blue()), colorToUint32(Color::Yellow()), + colorToUint32(Color::Orange()), colorToUint32(Color::Purple()), + colorToUint32(Color::Teal()), colorToUint32(Color::Pink())}; +const int num_colors = sizeof(nxtx_ranges_colors) / sizeof(uint32_t); + +uint32_t colorFromString(const std::string& str) { + return nxtx_ranges_colors[std::hash{}(str) % num_colors]; +} + +NvtxRange::NvtxRange(const std::string& message, const Color& color, + bool construct_stopped) + : started_(false), event_attributes_{0} { + Init(message, color); + if (!construct_stopped) Start(); +} + +NvtxRange::NvtxRange(const std::string& message, bool construct_stopped) + : started_(false), event_attributes_{0} { + Init(message, colorFromString(message)); + if (!construct_stopped) Start(); +} + +NvtxRange::~NvtxRange() { Stop(); } + +void NvtxRange::Start() { + started_ = true; + id_ = nvtxRangeStartEx(&event_attributes_); +} + +void NvtxRange::Stop() { + if (started_) nvtxRangeEnd(id_); + started_ = false; +} + +void NvtxRange::Init(const std::string& message, const Color& color) { + Init(message, colorToUint32(color)); +} + +void NvtxRange::Init(const std::string& message, const uint32_t color) { + // Initialize + event_attributes_ = nvtxEventAttributes_t{0}; + event_attributes_.version = NVTX_VERSION; + event_attributes_.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + // Configure the Attributes + event_attributes_.colorType = NVTX_COLOR_ARGB; + event_attributes_.color = color; + event_attributes_.messageType = NVTX_MESSAGE_TYPE_ASCII; + event_attributes_.message.ascii = message.c_str(); +} + +void mark(const std::string& message, const uint32_t color) { + // Initialize + nvtxEventAttributes_t event_attributes = {0}; + event_attributes.version = NVTX_VERSION; + event_attributes.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + event_attributes.colorType = NVTX_COLOR_ARGB; + event_attributes.color = color; + event_attributes.messageType = NVTX_MESSAGE_TYPE_ASCII; + event_attributes.message.ascii = message.c_str(); + nvtxMarkEx(&event_attributes); +} + +void mark(const std::string& message) { + mark(message, colorFromString(message)); +} + +void mark(const std::string& message, const Color& color) { + mark(message, colorToUint32(color)); +} + +} // namespace timing +} // namespace nvblox diff --git a/nvblox/src/utils/timing.cpp b/nvblox/src/utils/timing.cpp new file mode 100644 index 00000000..fafa535c --- /dev/null +++ b/nvblox/src/utils/timing.cpp @@ -0,0 +1,258 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +---- Original license for this file: ---- + + * Copyright (C) 2012-2013 Simon Lynen, ASL, ETH Zurich, Switzerland + * You can contact the author at + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Adapted from Paul Furgale Schweizer Messer sm_timing*/ + +#include "nvblox/utils/timing.h" + +#include +#include +#include + +#include +#include +#include +#include + +namespace nvblox { + +namespace timing { + +const double kNumSecondsPerNanosecond = 1.e-9; + +Timing& Timing::Instance() { + static Timing t; + return t; +} + +Timing::Timing() : maxTagLength_(0) {} + +Timing::~Timing() {} + +// Static functions to query the timers: +size_t Timing::GetHandle(std::string const& tag) { + std::lock_guard lock(Instance().mutex_); + // Search for an existing tag. + map_t::iterator i = Instance().tagMap_.find(tag); + if (i == Instance().tagMap_.end()) { + // If it is not there, create a tag. + size_t handle = Instance().timers_.size(); + Instance().tagMap_[tag] = handle; + Instance().timers_.push_back(TimerMapValue()); + // Track the maximum tag length to help printing a table of timing values + // later. + Instance().maxTagLength_ = std::max(Instance().maxTagLength_, tag.size()); + return handle; + } else { + return i->second; + } +} + +std::string Timing::GetTag(size_t handle) { + std::lock_guard lock(Instance().mutex_); + std::string tag; + + // Perform a linear search for the tag. + for (typename map_t::value_type current_tag : Instance().tagMap_) { + if (current_tag.second == handle) { + return current_tag.first; + } + } + return tag; +} + +// Class functions used for timing. +TimerChrono::TimerChrono(size_t handle, bool constructStopped) + : timing_(false), handle_(handle) { + if (!constructStopped) Start(); +} + +TimerChrono::TimerChrono(std::string const& tag, bool constructStopped) + : timing_(false), handle_(Timing::GetHandle(tag)) { + if (!constructStopped) Start(); +} + +TimerChrono::~TimerChrono() { + if (IsTiming()) Stop(); +} + +void TimerChrono::Start() { + timing_ = true; + time_ = std::chrono::system_clock::now(); +} + +void TimerChrono::Stop() { + std::chrono::time_point now = + std::chrono::system_clock::now(); + double dt = + static_cast( + std::chrono::duration_cast(now - time_) + .count()) * + kNumSecondsPerNanosecond; + + Timing::Instance().AddTime(handle_, dt); + timing_ = false; +} + +bool TimerChrono::IsTiming() const { return timing_; } + +TimerNvtx::TimerNvtx(std::string const& tag, bool constructStopped) + : timer_(tag, constructStopped), nvtx_range_(tag, constructStopped) {} + +TimerNvtx::TimerNvtx(std::string const& tag, const Color& color, + bool constructStopped) + : timer_(tag, constructStopped), + nvtx_range_(tag, color, constructStopped) {} + +void TimerNvtx::Start() { + timer_.Start(); + nvtx_range_.Start(); +} + +void TimerNvtx::Stop() { + timer_.Stop(); + nvtx_range_.Stop(); +} + +bool TimerNvtx::IsTiming() const { return timer_.IsTiming(); } + +void Timing::AddTime(size_t handle, double seconds) { + std::lock_guard lock(Instance().mutex_); + timers_[handle].acc_.Add(seconds); +} + +double Timing::GetTotalSeconds(size_t handle) { + std::lock_guard lock(Instance().mutex_); + return Instance().timers_[handle].acc_.Sum(); +} +double Timing::GetTotalSeconds(std::string const& tag) { + return GetTotalSeconds(GetHandle(tag)); +} +double Timing::GetMeanSeconds(size_t handle) { + std::lock_guard lock(Instance().mutex_); + return Instance().timers_[handle].acc_.Mean(); +} +double Timing::GetMeanSeconds(std::string const& tag) { + return GetMeanSeconds(GetHandle(tag)); +} +size_t Timing::GetNumSamples(size_t handle) { + return Instance().timers_[handle].acc_.TotalSamples(); +} +size_t Timing::GetNumSamples(std::string const& tag) { + return GetNumSamples(GetHandle(tag)); +} +double Timing::GetVarianceSeconds(size_t handle) { + std::lock_guard lock(Instance().mutex_); + return Instance().timers_[handle].acc_.LazyVariance(); +} +double Timing::GetVarianceSeconds(std::string const& tag) { + return GetVarianceSeconds(GetHandle(tag)); +} +double Timing::GetMinSeconds(size_t handle) { + std::lock_guard lock(Instance().mutex_); + return Instance().timers_[handle].acc_.Min(); +} +double Timing::GetMinSeconds(std::string const& tag) { + return GetMinSeconds(GetHandle(tag)); +} +double Timing::GetMaxSeconds(size_t handle) { + std::lock_guard lock(Instance().mutex_); + return Instance().timers_[handle].acc_.Max(); +} +double Timing::GetMaxSeconds(std::string const& tag) { + return GetMaxSeconds(GetHandle(tag)); +} + +double Timing::GetHz(size_t handle) { + std::lock_guard lock(Instance().mutex_); + const double rolling_mean = Instance().timers_[handle].acc_.RollingMean(); + CHECK_GT(rolling_mean, 0.0); + return 1.0 / rolling_mean; +} + +double Timing::GetHz(std::string const& tag) { return GetHz(GetHandle(tag)); } + +std::string Timing::SecondsToTimeString(double seconds) { + char buffer[256]; + snprintf(buffer, sizeof(buffer), "%09.6f", seconds); + return buffer; +} + +void Timing::Print(std::ostream& out) { + map_t& tagMap = Instance().tagMap_; + + if (tagMap.empty()) { + return; + } + + out << "NVBlox Timing\n"; + out << "-----------\n"; + for (typename map_t::value_type t : tagMap) { + size_t i = t.second; + out.width((std::streamsize)Instance().maxTagLength_); + out.setf(std::ios::left, std::ios::adjustfield); + out << t.first << "\t"; + out.width(7); + + out.setf(std::ios::right, std::ios::adjustfield); + out << GetNumSamples(i) << "\t"; + if (GetNumSamples(i) > 0) { + out << SecondsToTimeString(GetTotalSeconds(i)) << "\t"; + double meansec = GetMeanSeconds(i); + double stddev = sqrt(GetVarianceSeconds(i)); + out << "(" << SecondsToTimeString(meansec) << " +- "; + out << SecondsToTimeString(stddev) << ")\t"; + + double minsec = GetMinSeconds(i); + double maxsec = GetMaxSeconds(i); + + // The min or max are out of bounds. + out << "[" << SecondsToTimeString(minsec) << "," + << SecondsToTimeString(maxsec) << "]"; + } + out << std::endl; + } +} +std::string Timing::Print() { + std::stringstream ss; + Print(ss); + return ss.str(); +} + +void Timing::Reset() { + std::lock_guard lock(Instance().mutex_); + Instance().tagMap_.clear(); +} + +} // namespace timing +} // namespace nvblox diff --git a/nvblox/tests/CMakeLists.txt b/nvblox/tests/CMakeLists.txt new file mode 100644 index 00000000..2d4556b4 --- /dev/null +++ b/nvblox/tests/CMakeLists.txt @@ -0,0 +1,120 @@ +find_package(GTest REQUIRED) +enable_testing() + +# Copy test data into the testing space (build/tests) +file(INSTALL "${CMAKE_CURRENT_SOURCE_DIR}/data" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}") + +# A library containing functions used by many tests. +add_library(nvblox_test_utils SHARED + lib/utils.cpp + lib/projective_tsdf_integrator_cpu.cpp + lib/cuda/increment_kernel.cu + lib/cuda/gpu_image_routines.cu + lib/cuda/interpolation_2d_gpu.cu + lib/cuda/projective_tsdf_integrator_cuda_components.cu + lib/cuda/test_utils_cuda.cu + lib/cuda/blox.cu + lib/cuda/blox_utils.cu + lib/cuda/gpu_layer_utils.cu + lib/cuda/gpu_indexing.cu +) +target_include_directories(nvblox_test_utils PUBLIC + include + ${GTEST_INCLUDE_DIRS} +) +target_link_libraries(nvblox_test_utils + nvblox_lib ${GTEST_LIBRARIES} ${CUDA_LIBRARIES} pthread +) +set_target_properties(nvblox_test_utils PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +set(TEST_OPTIONS DISCOVERY_TIMEOUT 30) + +# The tests +add_executable(test_camera test_camera.cpp) +target_link_libraries(test_camera nvblox_test_utils) +gtest_discover_tests(test_camera ${TEST_OPTIONS}) + +add_executable(test_indexing test_indexing.cpp) +target_link_libraries(test_indexing nvblox_test_utils) +gtest_discover_tests(test_indexing ${TEST_OPTIONS}) + +add_executable(test_layer test_layer.cpp) +target_link_libraries(test_layer nvblox_test_utils) +gtest_discover_tests(test_layer ${TEST_OPTIONS}) + +add_executable(test_3d_interpolation test_3d_interpolation.cpp) +target_link_libraries(test_3d_interpolation nvblox_test_utils) +gtest_discover_tests(test_3d_interpolation ${TEST_OPTIONS}) + +add_executable(test_tsdf_integrator test_tsdf_integrator.cpp) +target_link_libraries(test_tsdf_integrator nvblox_test_utils) +gtest_discover_tests(test_tsdf_integrator ${TEST_OPTIONS}) + +add_executable(test_3dmatch test_3dmatch.cpp) +target_link_libraries(test_3dmatch nvblox_test_utils nvblox_lib) +gtest_discover_tests(test_3dmatch ${TEST_OPTIONS}) +set_target_properties(test_3dmatch PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +add_executable(test_unified_ptr test_unified_ptr.cpp) +target_link_libraries(test_unified_ptr nvblox_test_utils) +gtest_discover_tests(test_unified_ptr ${TEST_OPTIONS}) + +add_executable(test_mesh test_mesh.cpp) +target_link_libraries(test_mesh nvblox_test_utils) +gtest_discover_tests(test_mesh ${TEST_OPTIONS}) + +add_executable(test_scene test_scene.cpp) +target_link_libraries(test_scene nvblox_test_utils) +gtest_discover_tests(test_scene ${TEST_OPTIONS}) + +add_executable(test_depth_image test_depth_image.cpp) +target_link_libraries(test_depth_image nvblox_test_utils) +gtest_discover_tests(test_depth_image ${TEST_OPTIONS}) + +add_executable(test_tsdf_integrator_cuda_components test_tsdf_integrator_cuda_components.cpp) +target_link_libraries(test_tsdf_integrator_cuda_components nvblox_test_utils) +gtest_discover_tests(test_tsdf_integrator_cuda_components ${TEST_OPTIONS}) + +add_executable(test_unified_vector test_unified_vector.cpp) +target_link_libraries(test_unified_vector nvblox_test_utils) +gtest_discover_tests(test_unified_vector ${TEST_OPTIONS}) + +add_executable(test_esdf_integrator test_esdf_integrator.cpp) +target_link_libraries(test_esdf_integrator nvblox_test_utils) +gtest_discover_tests(test_esdf_integrator ${TEST_OPTIONS}) + +add_executable(test_color_image test_color_image.cpp) +target_link_libraries(test_color_image nvblox_test_utils) +gtest_discover_tests(test_color_image ${TEST_OPTIONS}) + +add_executable(test_color_integrator test_color_integrator.cpp) +target_link_libraries(test_color_integrator nvblox_test_utils) +gtest_discover_tests(test_color_integrator ${TEST_OPTIONS}) + +add_executable(test_mesh_coloring test_mesh_coloring.cpp) +target_link_libraries(test_mesh_coloring nvblox_test_utils) +gtest_discover_tests(test_mesh_coloring ${TEST_OPTIONS}) + +add_executable(test_for_memory_leaks test_for_memory_leaks.cpp) +target_link_libraries(test_for_memory_leaks nvblox_test_utils) +gtest_discover_tests(test_for_memory_leaks ${TEST_OPTIONS}) + +add_executable(test_frustum test_frustum.cpp) +target_link_libraries(test_frustum nvblox_test_utils) +gtest_discover_tests(test_frustum ${TEST_OPTIONS}) + +add_executable(test_gpu_layer_view test_gpu_layer_view.cpp) +target_link_libraries(test_gpu_layer_view nvblox_test_utils) +gtest_discover_tests(test_gpu_layer_view ${TEST_OPTIONS}) + +add_executable(test_sphere_tracing test_sphere_tracing.cpp) +target_link_libraries(test_sphere_tracing nvblox_test_utils) +gtest_discover_tests(test_sphere_tracing ${TEST_OPTIONS}) + +add_executable(test_cake test_cake.cpp) +target_link_libraries(test_cake nvblox_test_utils) +gtest_discover_tests(test_cake ${TEST_OPTIONS}) + +add_executable(test_traits test_traits.cpp) +target_link_libraries(test_traits nvblox_test_utils) +gtest_discover_tests(test_traits ${TEST_OPTIONS}) diff --git a/nvblox/tests/data/3dmatch/camera-intrinsics.txt b/nvblox/tests/data/3dmatch/camera-intrinsics.txt new file mode 100644 index 00000000..de8eb850 --- /dev/null +++ b/nvblox/tests/data/3dmatch/camera-intrinsics.txt @@ -0,0 +1,3 @@ + 5.70342205e+02 0.00000000e+00 3.20000000e+02 + 0.00000000e+00 5.70342205e+02 2.40000000e+02 + 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000000.color.png b/nvblox/tests/data/3dmatch/seq-01/frame-000000.color.png new file mode 100644 index 00000000..c28ce57b Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000000.color.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000000.depth.png b/nvblox/tests/data/3dmatch/seq-01/frame-000000.depth.png new file mode 100644 index 00000000..da1962a9 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000000.depth.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000000.pose.txt b/nvblox/tests/data/3dmatch/seq-01/frame-000000.pose.txt new file mode 100644 index 00000000..3f98b68e --- /dev/null +++ b/nvblox/tests/data/3dmatch/seq-01/frame-000000.pose.txt @@ -0,0 +1,4 @@ + 3.13181000e-01 3.09473000e-01 -8.97856000e-01 1.97304600e+00 +-8.73910000e-02 -9.32015000e-01 -3.51729000e-01 1.12573400e+00 +-9.45665000e-01 1.88620000e-01 -2.64844000e-01 3.09820000e-01 + 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000001.color.png b/nvblox/tests/data/3dmatch/seq-01/frame-000001.color.png new file mode 100644 index 00000000..ab3031e6 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000001.color.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000001.depth.png b/nvblox/tests/data/3dmatch/seq-01/frame-000001.depth.png new file mode 100644 index 00000000..578292d7 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000001.depth.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000001.pose.txt b/nvblox/tests/data/3dmatch/seq-01/frame-000001.pose.txt new file mode 100644 index 00000000..25a0f7c6 --- /dev/null +++ b/nvblox/tests/data/3dmatch/seq-01/frame-000001.pose.txt @@ -0,0 +1,4 @@ + 3.18994000e-01 3.09870000e-01 -8.95670000e-01 1.96907500e+00 +-8.40980000e-02 -9.32059000e-01 -3.52412000e-01 1.12539500e+00 +-9.44019000e-01 1.87741000e-01 -2.71262000e-01 3.24652000e-01 + 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000002.color.png b/nvblox/tests/data/3dmatch/seq-01/frame-000002.color.png new file mode 100644 index 00000000..4a812643 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000002.color.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000002.depth.png b/nvblox/tests/data/3dmatch/seq-01/frame-000002.depth.png new file mode 100644 index 00000000..621162ae Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000002.depth.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000002.pose.txt b/nvblox/tests/data/3dmatch/seq-01/frame-000002.pose.txt new file mode 100644 index 00000000..890e7d6e --- /dev/null +++ b/nvblox/tests/data/3dmatch/seq-01/frame-000002.pose.txt @@ -0,0 +1,4 @@ + 3.24792000e-01 3.10273000e-01 -8.93444000e-01 1.96510400e+00 +-8.08000000e-02 -9.32101000e-01 -3.53072000e-01 1.12505500e+00 +-9.42328000e-01 1.86865000e-01 -2.77669000e-01 3.39484000e-01 + 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000116.color.png b/nvblox/tests/data/3dmatch/seq-01/frame-000116.color.png new file mode 100644 index 00000000..d61f31a2 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000116.color.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000116.depth.png b/nvblox/tests/data/3dmatch/seq-01/frame-000116.depth.png new file mode 100644 index 00000000..9a152c85 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000116.depth.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000116.pose.txt b/nvblox/tests/data/3dmatch/seq-01/frame-000116.pose.txt new file mode 100644 index 00000000..e8cdf0fe --- /dev/null +++ b/nvblox/tests/data/3dmatch/seq-01/frame-000116.pose.txt @@ -0,0 +1,4 @@ + 7.66485000e-01 1.65186000e-01 -6.20656000e-01 1.77984700e+00 +-4.74910000e-02 -9.49137000e-01 -3.11260000e-01 1.10068500e+00 +-6.40504000e-01 2.68052000e-01 -7.19655000e-01 7.44543000e-01 + 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000422.color.png b/nvblox/tests/data/3dmatch/seq-01/frame-000422.color.png new file mode 100644 index 00000000..5a5b1aa5 Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000422.color.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000422.depth.png b/nvblox/tests/data/3dmatch/seq-01/frame-000422.depth.png new file mode 100644 index 00000000..e313084b Binary files /dev/null and b/nvblox/tests/data/3dmatch/seq-01/frame-000422.depth.png differ diff --git a/nvblox/tests/data/3dmatch/seq-01/frame-000422.pose.txt b/nvblox/tests/data/3dmatch/seq-01/frame-000422.pose.txt new file mode 100644 index 00000000..f86f5112 --- /dev/null +++ b/nvblox/tests/data/3dmatch/seq-01/frame-000422.pose.txt @@ -0,0 +1,4 @@ + 8.59463000e-01 8.99460000e-02 -5.03222000e-01 6.44752000e-01 + 2.90020000e-02 -9.91393000e-01 -1.27668000e-01 1.10310800e+00 +-5.10374000e-01 9.51310000e-02 -8.54675000e-01 7.29334000e-01 + 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00 diff --git a/nvblox/tests/include/nvblox/tests/blox.h b/nvblox/tests/include/nvblox/tests/blox.h new file mode 100644 index 00000000..58d90428 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/blox.h @@ -0,0 +1,47 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/types.h" +#include "nvblox/core/unified_ptr.h" + +namespace nvblox { + +// Dummy block type that just stores its own index. +struct IndexBlock { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + Index3D data; + + static Ptr allocate(MemoryType memory_type) { + return make_unified(memory_type); + } +}; + +// Dummy block that stores a single bool +struct DummyBlock { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + bool data = false; + + static Ptr allocate(MemoryType memory_type) { + return make_unified(memory_type); + } +}; + +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/blox_utils.h b/nvblox/tests/include/nvblox/tests/blox_utils.h new file mode 100644 index 00000000..23be14d1 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/blox_utils.h @@ -0,0 +1,66 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" + +namespace nvblox { + +template +void setBlockBytesConstantOnGPU(int value, BlockType* block_device_ptr) { + cudaMemset(block_device_ptr, value, sizeof(BlockType)); +} + +struct TestVoxel { + static constexpr uint8_t kCPUInitializationValue = 0; + static constexpr uint8_t kGPUInitializationValue = 1; + + uint8_t data = kCPUInitializationValue; +}; + +template <> +inline void VoxelBlock::initOnGPU( + VoxelBlock* voxel_block_ptr) { + setBlockBytesConstantOnGPU(TestVoxel::kGPUInitializationValue, + voxel_block_ptr); +} + +// A custom (non-voxel) block which forgets to define allocate(), and therefore +// will fail the type_trait has nvblox::traits::has_allocate() +struct TestBlockNoAllocation { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + static constexpr uint8_t kCPUInitializationValue = 0; + + uint8_t data = kCPUInitializationValue; +}; + +using TestBlock = VoxelBlock; + +namespace test_utils { + +// Fills a TsdfBlock such that the voxels distance and weight values are their +// linear index (as a float) +void setTsdfBlockVoxelsInSequence(TsdfBlock::Ptr block); + +bool checkBlockAllConstant(const TsdfBlock::Ptr block, TsdfVoxel voxel_cpu); +bool checkBlockAllConstant(const TestBlock::Ptr block, TestVoxel voxel_cpu); +bool checkBlockAllConstant(const ColorBlock::Ptr block, ColorVoxel voxel_cpu); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/gpu_image_routines.h b/nvblox/tests/include/nvblox/tests/gpu_image_routines.h new file mode 100644 index 00000000..c2b9a237 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/gpu_image_routines.h @@ -0,0 +1,34 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/color.h" +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace test_utils { + +void setImageConstantOnGpu(const float value, DepthImage* image_ptr); +void setImageConstantOnGpu(const Color value, ColorImage* image_ptr); + +void getDifferenceImageOnGPU(const ColorImage& image_1, + const ColorImage& image_2, + ColorImage* diff_image_ptr); +void getDifferenceImageOnGPU(const DepthImage& image_1, + const DepthImage& image_2, + DepthImage* diff_image_ptr); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/gpu_indexing.h b/nvblox/tests/include/nvblox/tests/gpu_indexing.h new file mode 100644 index 00000000..0a4a8732 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/gpu_indexing.h @@ -0,0 +1,31 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/types.h" + +namespace nvblox { +namespace test_utils { + +void getBlockAndVoxelIndexFromPositionInLayerOnGPU(const float block_size, + const Vector3f& position, + Index3D* block_idx, + Index3D* voxel_idx); + +void getBlockAndVoxelIndexFromPositionInLayerOnGPU( + const float block_size, const std::vector& positions, + std::vector* block_indices, std::vector* voxel_indices); + +} // namespace test_utils +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/tests/include/nvblox/tests/gpu_layer_utils.h b/nvblox/tests/include/nvblox/tests/gpu_layer_utils.h new file mode 100644 index 00000000..03b175c8 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/gpu_layer_utils.h @@ -0,0 +1,36 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include + +#include +#include "nvblox/gpu_hash/gpu_layer_view.h" + +namespace nvblox { +namespace test_utils { + +// Returns a vector of flags indicating if the query indices exist. +std::vector getContainsFlags(const GPULayerView& gpu_layer, + const std::vector& indices); + +// Retrieves Voxels on the GPU and brings them back to CPU. +std::pair, std::vector> getVoxelsAtPositionsOnGPU( + const GPULayerView& gpu_layer, + const std::vector& p_L_vec); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/increment_on_gpu.h b/nvblox/tests/include/nvblox/tests/increment_on_gpu.h new file mode 100644 index 00000000..254477df --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/increment_on_gpu.h @@ -0,0 +1,25 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +namespace nvblox { +namespace test_utils { + +void incrementOnGPU(int* number); + +void incrementOnGPU(const int num_elelments, int* number); + +} // namespace test_utils +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/tests/include/nvblox/tests/interpolation_2d_gpu.h b/nvblox/tests/include/nvblox/tests/interpolation_2d_gpu.h new file mode 100644 index 00000000..7a032c2b --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/interpolation_2d_gpu.h @@ -0,0 +1,35 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/image.h" +#include "nvblox/core/types.h" + +namespace nvblox { +namespace test_utils { + +void linearInterpolateImageGpu(const DepthImage& image, + const std::vector& u_px_vec, + std::vector* values_ptr, + std::vector* success_flags_ptr); + +void linearInterpolateImageGpu(const ColorImage& image, + const std::vector& u_px_vec, + std::vector* values_ptr, + std::vector* success_flags_ptr); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cpu.h b/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cpu.h new file mode 100644 index 00000000..0cf54e89 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cpu.h @@ -0,0 +1,37 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" + +namespace nvblox { + +class ProjectiveTsdfIntegratorCPU : public ProjectiveTsdfIntegrator { + public: + ProjectiveTsdfIntegratorCPU() : ProjectiveTsdfIntegrator() {} + virtual ~ProjectiveTsdfIntegratorCPU() {} + + protected: + // We override the GPU version of this function and run it on the CPU. + void updateBlocks(const std::vector& block_indices, + const DepthImage& depth_frame, const Transform& T_L_C, + const Camera& camera, const float truncation_distance_m, + TsdfLayer* layer) override; +}; + +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cuda_components.h b/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cuda_components.h new file mode 100644 index 00000000..5c04ae01 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/projective_tsdf_integrator_cuda_components.h @@ -0,0 +1,50 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace nvblox { +namespace test_utils { + +// A Nx2 matrix representing the results of projecting all voxels in a block +// into the image plane +using BlockProjectionResult = + Eigen::Matrix::kVoxelsPerSide * + VoxelBlock::kVoxelsPerSide * + VoxelBlock::kVoxelsPerSide, + 2>; + +Eigen::Matrix3Xf transformPointsOnGPU(const Transform& T_A_B, + const Eigen::Matrix3Xf& vecs_A); + +std::vector projectBlocksOnGPU( + const std::vector& block_indices, const Camera& camera, + const Transform& T_C_L, TsdfLayer* distance_layer_ptr); + +Eigen::VectorXf interpolatePointsOnGPU(const DepthImage& depth_frame, + const Eigen::MatrixX2f& u_px_vec); + +void setVoxelBlockOnGPU(TsdfLayer* layer); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/test_utils_cuda.h b/nvblox/tests/include/nvblox/tests/test_utils_cuda.h new file mode 100644 index 00000000..f362bd8c --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/test_utils_cuda.h @@ -0,0 +1,39 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_vector.h" + +namespace nvblox { +namespace test_utils { + +void fillVectorWithConstant(float value, unified_vector* vec_ptr); +void fillVectorWithConstant(int value, unified_vector* vec_ptr); + +void fillWithConstant(float value, size_t num_elems, float* vec_ptr); +void fillWithConstant(int value, size_t num_elems, int* vec_ptr); + +bool checkVectorAllConstant(const unified_vector& vec_ptr, float value); +bool checkVectorAllConstant(const unified_vector& vec_ptr, int value); + +bool checkAllConstant(const float* vec_ptr, float value, size_t num_elems); +bool checkAllConstant(const int* vec_ptr, int value, size_t num_elems); + +void addOneToAllGPU(unified_vector* vec_ptr); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/include/nvblox/tests/utils.h b/nvblox/tests/include/nvblox/tests/utils.h new file mode 100644 index 00000000..2f58ba14 --- /dev/null +++ b/nvblox/tests/include/nvblox/tests/utils.h @@ -0,0 +1,44 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#pragma once + +#include "nvblox/core/blox.h" +#include "nvblox/core/color.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +namespace nvblox { +namespace test_utils { + +float randomFloatInRange(float f_min, float f_max); + +float randomIntInRange(int i_min, int i_max); + +float randomSign(); + +Index3D getRandomIndex3dInRange(const int min, const int max); + +Vector3f getRandomVector3fInRange(const float min, const float max); + +Vector3f getRandomVector3fInRange(const Vector3f& min, const Vector3f& max); + +Vector3f getRandomUnitVector3f(); + +Color randomColor(); + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/blox.cu b/nvblox/tests/lib/cuda/blox.cu new file mode 100644 index 00000000..2bf085bf --- /dev/null +++ b/nvblox/tests/lib/cuda/blox.cu @@ -0,0 +1,29 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/blox.h" + +#include "nvblox/core/layer.h" +#include "nvblox/gpu_hash/cuda/impl/gpu_layer_view_impl.cuh" + +namespace nvblox { + +using IndexBlockLayer = BlockLayer; +template class GPULayerView; + +using DummyBlockLayer = BlockLayer; +template class GPULayerView; + +} // nvblox diff --git a/nvblox/tests/lib/cuda/blox_utils.cu b/nvblox/tests/lib/cuda/blox_utils.cu new file mode 100644 index 00000000..ea537074 --- /dev/null +++ b/nvblox/tests/lib/cuda/blox_utils.cu @@ -0,0 +1,140 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/blox_utils.h" + +namespace nvblox { + +// The requirement for this second definition of this variable here is a +// language issue with c++14. Can be removed in c++17. +constexpr uint8_t TestBlockNoAllocation::kCPUInitializationValue; + +namespace test_utils { + +__global__ void setTsdfBlockVoxelsInSequenceKernel(TsdfBlock* block) { + const int lin_idx = + threadIdx.x + TsdfBlock::kVoxelsPerSide * + (threadIdx.y + TsdfBlock::kVoxelsPerSide * threadIdx.z); + TsdfVoxel* voxel_ptr = &block->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + voxel_ptr->distance = static_cast(lin_idx); + voxel_ptr->weight = static_cast(lin_idx); +} + +void setTsdfBlockVoxelsInSequence(TsdfBlock::Ptr block) { + CHECK(block.memory_type() != MemoryType::kHost); + constexpr int kNumBlocks = 1; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + setTsdfBlockVoxelsInSequenceKernel<<>>( + block.get()); +} + +__global__ void checkBlockAllConstant(const TsdfBlock* block, + const TsdfVoxel* voxel_constant, + bool* flag) { + if ((blockIdx.x == 0) && (threadIdx.x == 0)) { + *flag = true; + } + __syncthreads(); + const TsdfVoxel voxel = block->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + const float distance_diff = + std::abs(voxel.distance - voxel_constant->distance); + const float weight_diff = std::abs(voxel.weight - voxel_constant->weight); + constexpr float eps = 1e-4; + if ((distance_diff > eps) || (weight_diff > eps)) { + *flag = false; + } +} + +__global__ void checkBlockAllConstant(const TestBlock* block, + const TestVoxel* voxel_constant, + bool* flag) { + if ((blockIdx.x == 0) && (threadIdx.x == 0)) { + *flag = true; + } + __syncthreads(); + const TestVoxel voxel = block->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + if (voxel.data != voxel_constant->data) { + *flag = false; + } +} + +__global__ void checkBlockAllConstant(const ColorBlock* block, + const ColorVoxel* voxel_constant, + bool* flag) { + if ((blockIdx.x == 0) && (threadIdx.x == 0)) { + *flag = true; + } + __syncthreads(); + const ColorVoxel voxel = block->voxels[threadIdx.z][threadIdx.y][threadIdx.x]; + if ((voxel.color.r != voxel_constant->color.r) || + (voxel.color.g != voxel_constant->color.g) || + (voxel.color.b != voxel_constant->color.b)) { + *flag = false; + } + const float weight_diff = std::abs(voxel.weight - voxel_constant->weight); + constexpr float eps = 1e-4; + if (weight_diff > eps) { + *flag = false; + } +} + +template +bool checkBlockAllConstantTemplate( + const typename VoxelBlock::Ptr block, VoxelType voxel_cpu) { + // Allocate memory for the flag + bool* flag_device_ptr; + checkCudaErrors(cudaMalloc(&flag_device_ptr, sizeof(bool))); + + // Transfer the CPU voxel to GPU + VoxelType* voxel_device_ptr; + checkCudaErrors(cudaMalloc(&voxel_device_ptr, sizeof(VoxelType))); + checkCudaErrors(cudaMemcpy(voxel_device_ptr, &voxel_cpu, sizeof(VoxelType), + cudaMemcpyHostToDevice)); + + constexpr int kNumBlocks = 1; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + checkBlockAllConstant<<>>( + block.get(), voxel_device_ptr, flag_device_ptr); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); + + // Copy the flag back + bool flag; + checkCudaErrors( + cudaMemcpy(&flag, flag_device_ptr, sizeof(bool), cudaMemcpyDeviceToHost)); + + // Free the flag + checkCudaErrors(cudaFree(flag_device_ptr)); + checkCudaErrors(cudaFree(voxel_device_ptr)); + + return flag; +} + +bool checkBlockAllConstant(const TsdfBlock::Ptr block, TsdfVoxel voxel_cpu) { + return checkBlockAllConstantTemplate(block, voxel_cpu); +} + +bool checkBlockAllConstant(const TestBlock::Ptr block, TestVoxel voxel_cpu) { + return checkBlockAllConstantTemplate(block, voxel_cpu); +} + +bool checkBlockAllConstant(const ColorBlock::Ptr block, ColorVoxel voxel_cpu) { + return checkBlockAllConstantTemplate(block, voxel_cpu); +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/gpu_image_routines.cu b/nvblox/tests/lib/cuda/gpu_image_routines.cu new file mode 100644 index 00000000..8325b83e --- /dev/null +++ b/nvblox/tests/lib/cuda/gpu_image_routines.cu @@ -0,0 +1,126 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/gpu_image_routines.h" + +#include "nvblox/core/cuda/error_check.cuh" +#include "nvblox/core/image.h" + +namespace nvblox { +namespace test_utils { + +template +__global__ void setImageConstantKernel(ElementType* image, const int rows, + const int cols, ElementType value) { + // NOTE(alexmillane): Memory access is fully coallesed because neighbouring + // threads in the grid x dimension, access neighbouring memory elements + // (row-major images). + const int row_idx = blockIdx.x * blockDim.x + threadIdx.x; + const int col_idx = blockIdx.y * blockDim.y + threadIdx.y; + if (col_idx < cols && row_idx < rows) { + image::access(row_idx, col_idx, cols, image) = value; + } +} + +template +void setImageConstantOnGpuTemplate(const ElementType value, + Image* image_ptr) { + // Set the pixels to a constant value. One thread per pixel (lol) + constexpr int kThreadsPerBlockInEachDimension = 8; + dim3 blockShape(kThreadsPerBlockInEachDimension, + kThreadsPerBlockInEachDimension); + dim3 gridShape((image_ptr->rows() / kThreadsPerBlockInEachDimension) + 1, + (image_ptr->cols() / kThreadsPerBlockInEachDimension) + 1); + setImageConstantKernel<<>>( + image_ptr->dataPtr(), image_ptr->rows(), image_ptr->cols(), value); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); +} + +void setImageConstantOnGpu(const float value, DepthImage* image_ptr) { + setImageConstantOnGpuTemplate(value, image_ptr); +} + +void setImageConstantOnGpu(const Color value, ColorImage* image_ptr) { + setImageConstantOnGpuTemplate(value, image_ptr); +} + +__device__ Color diff(const Color& color_1, const Color& color_2) { + return Color(static_cast(std::abs(static_cast(color_1.r) - + static_cast(color_2.r))), + static_cast(std::abs(static_cast(color_1.g) - + static_cast(color_2.g))), + static_cast(std::abs(static_cast(color_1.b) - + static_cast(color_2.b)))); +} + +__device__ float diff(const float& depth_1, const float& depth_2) { + return fabsf(depth_1 - depth_2); +} + +template +__global__ void differenceImageKernel(ElementType* diff_image_ptr, + const int rows, const int cols, + const ElementType* image_1, + const ElementType* image_2) { + // NOTE(alexmillane): Memory access is fully coallesed because neighbouring + // threads in the grid x dimension, access neighbouring memory elements + // (row-major images). + const int row_idx = blockIdx.x * blockDim.x + threadIdx.x; + const int col_idx = blockIdx.y * blockDim.y + threadIdx.y; + if (col_idx < cols && row_idx < rows) { + const ElementType color_1 = image::access(row_idx, col_idx, cols, image_1); + const ElementType color_2 = image::access(row_idx, col_idx, cols, image_2); + const ElementType abs_color_diff = diff(color_1, color_2); + image::access(row_idx, col_idx, cols, diff_image_ptr) = abs_color_diff; + } +} + +template +void getDifferenceImageTemplate(const ImageType& image_1, + const ImageType& image_2, + ImageType* diff_image_ptr) { + CHECK_EQ(image_1.rows(), image_2.rows()); + CHECK_EQ(image_1.cols(), image_2.cols()); + CHECK(image_1.memory_type() == image_2.memory_type()); + CHECK_NOTNULL(diff_image_ptr); + // Create the image for output + *diff_image_ptr = + ImageType(image_1.rows(), image_1.cols(), image_1.memory_type()); + // Set the pixels to a constant value. One thread per pixel (lol) + constexpr int kThreadsPerBlockInEachDimension = 8; + dim3 blockShape(kThreadsPerBlockInEachDimension, + kThreadsPerBlockInEachDimension); + dim3 gridShape((image_1.rows() / kThreadsPerBlockInEachDimension) + 1, + (image_1.cols() / kThreadsPerBlockInEachDimension) + 1); + differenceImageKernel<<>>( + diff_image_ptr->dataPtr(), image_1.rows(), image_1.cols(), + image_1.dataConstPtr(), image_2.dataConstPtr()); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); +} + +void getDifferenceImageOnGPU(const ColorImage& image_1, const ColorImage& image_2, + ColorImage* diff_image_ptr) { + getDifferenceImageTemplate(image_1, image_2, diff_image_ptr); +} + +void getDifferenceImageOnGPU(const DepthImage& image_1, const DepthImage& image_2, + DepthImage* diff_image_ptr) { + getDifferenceImageTemplate(image_1, image_2, diff_image_ptr); +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/gpu_indexing.cu b/nvblox/tests/lib/cuda/gpu_indexing.cu new file mode 100644 index 00000000..d3225c17 --- /dev/null +++ b/nvblox/tests/lib/cuda/gpu_indexing.cu @@ -0,0 +1,84 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/gpu_indexing.h" + +#include + +#include "nvblox/core/indexing.h" +#include "nvblox/core/unified_vector.h" + +namespace nvblox { +namespace test_utils { + +__global__ void getBlockAndVoxelIndexFromPositionInLayerKernel( + const Vector3f position, const float block_size, Index3D* block_idx, + Index3D* voxel_idx) { + getBlockAndVoxelIndexFromPositionInLayer(block_size, position, block_idx, + voxel_idx); +} + +__global__ void getBlockAndVoxelIndexFromPositionInLayerKernel( + const Vector3f* positions, const float block_size, const int num_positions, + Index3D* block_indices, Index3D* voxel_indices) { + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_positions) { + getBlockAndVoxelIndexFromPositionInLayer( + block_size, positions[idx], &block_indices[idx], &voxel_indices[idx]); + } +} + +void getBlockAndVoxelIndexFromPositionInLayerOnGPU(const float block_size, + const Vector3f& position, + Index3D* block_idx, + Index3D* voxel_idx) { + Index3D* block_idx_device; + Index3D* voxel_idx_device; + checkCudaErrors(cudaMalloc(&block_idx_device, sizeof(Index3D))); + checkCudaErrors(cudaMalloc(&voxel_idx_device, sizeof(Index3D))); + + getBlockAndVoxelIndexFromPositionInLayerKernel<<<1, 1>>>( + position, block_size, block_idx_device, voxel_idx_device); + + checkCudaErrors(cudaMemcpy(block_idx, block_idx_device, sizeof(Index3D), + cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(voxel_idx, voxel_idx_device, sizeof(Index3D), + cudaMemcpyDeviceToHost)); + + checkCudaErrors(cudaFree(block_idx_device)); + checkCudaErrors(cudaFree(voxel_idx_device)); +} + +void getBlockAndVoxelIndexFromPositionInLayerOnGPU( + const float block_size, const std::vector& positions, + std::vector* block_indices, std::vector* voxel_indices) { + device_vector positions_device(positions); + + device_vector block_indices_device(positions.size()); + device_vector voxel_indices_device(positions.size()); + + constexpr int kNumThreads = 1024; + const int kNumBlocks = positions.size() / kNumThreads + 1; + + getBlockAndVoxelIndexFromPositionInLayerKernel<<>>( + positions_device.data(), block_size, positions_device.size(), + block_indices_device.data(), voxel_indices_device.data()); + + *block_indices = block_indices_device.toVector(); + *voxel_indices = voxel_indices_device.toVector(); +} + +} // namespace test_utils +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/tests/lib/cuda/gpu_layer_utils.cu b/nvblox/tests/lib/cuda/gpu_layer_utils.cu new file mode 100644 index 00000000..37648b80 --- /dev/null +++ b/nvblox/tests/lib/cuda/gpu_layer_utils.cu @@ -0,0 +1,108 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/gpu_layer_utils.h" + +#include + +#include "nvblox/gpu_hash/cuda/gpu_hash_interface.cuh" +#include "nvblox/gpu_hash/cuda/gpu_indexing.cuh" + +namespace nvblox { +namespace test_utils { + +__global__ void getContainsFlagsKernel( + Index3DDeviceHashMapType block_hash, Index3D* indices, + int num_indices, bool* flags) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < num_indices) { + flags[idx] = block_hash.contains(indices[idx]); + } +} + +std::vector getContainsFlags( + const GPULayerView& gpu_layer, + const std::vector& indices) { + // CPU -> GPU + thrust::device_vector device_indices(indices); + + // Output space + thrust::device_vector device_flags(device_indices.size()); + + // Kernel + constexpr int kNumThreadsPerBlock = 32; + const int num_blocks = device_indices.size() / kNumThreadsPerBlock + 1; + getContainsFlagsKernel<<>>( + gpu_layer.getHash().impl_, device_indices.data().get(), + device_indices.size(), device_flags.data().get()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); + + // GPU -> CPU + std::vector host_flags(device_flags.size()); + thrust::copy(device_flags.begin(), device_flags.end(), host_flags.begin()); + + return host_flags; +} + +__global__ void getVoxelsAtPositionsKernel( + Index3DDeviceHashMapType block_hash, const Vector3f* p_L_vec, + float block_size, int num_points, TsdfVoxel* voxels, bool* flags) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < num_points) { + const Vector3f p_L = p_L_vec[idx]; + TsdfVoxel* voxel_ptr; + const bool flag = + getVoxelAtPosition(block_hash, p_L, block_size, &voxel_ptr); + flags[idx] = flag; + if (flag) { + voxels[idx] = *voxel_ptr; + } + } +} + +std::pair, std::vector> getVoxelsAtPositionsOnGPU( + const GPULayerView& gpu_layer, + const std::vector& p_L_vec) { + // CPU -> GPU + thrust::device_vector device_positions(p_L_vec); + + // Output space + thrust::device_vector device_voxels(device_positions.size()); + thrust::device_vector device_flags(device_positions.size()); + + // Kernel + constexpr int kNumThreadsPerBlock = 32; + const int num_blocks = device_positions.size() / kNumThreadsPerBlock + 1; + getVoxelsAtPositionsKernel<<>>( + gpu_layer.getHash().impl_, device_positions.data().get(), + gpu_layer.block_size(), device_positions.size(), + device_voxels.data().get(), device_flags.data().get()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); + + // GPU -> CPU + std::vector host_voxels(device_voxels.size()); + thrust::copy(device_voxels.begin(), device_voxels.end(), host_voxels.begin()); + std::vector host_flags(device_flags.size()); + thrust::copy(device_flags.begin(), device_flags.end(), host_flags.begin()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); + + return {std::move(host_voxels), std::move(host_flags)}; +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/increment_kernel.cu b/nvblox/tests/lib/cuda/increment_kernel.cu new file mode 100644 index 00000000..c8b86bb2 --- /dev/null +++ b/nvblox/tests/lib/cuda/increment_kernel.cu @@ -0,0 +1,48 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/increment_on_gpu.h" + +__global__ void incrementKernel(int* number) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + (*number)++; + } +} + +__global__ void incrementKernel(int* number, const int num_elelments) { + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_elelments) { + number[idx]++; + } +} + + +namespace nvblox { +namespace test_utils { + +void incrementOnGPU(int* number) { + incrementKernel<<<1, 1>>>(number); + cudaDeviceSynchronize(); +} + +void incrementOnGPU(const int num_elelments, int* number) { + constexpr int kThreadsPerBlock = 32; + const int num_blocks = (num_elelments / kThreadsPerBlock) + 1; + incrementKernel<<>>(number, num_elelments); + cudaDeviceSynchronize(); +} + +} // namespace test_utils +} // namespace nvblox \ No newline at end of file diff --git a/nvblox/tests/lib/cuda/interpolation_2d_gpu.cu b/nvblox/tests/lib/cuda/interpolation_2d_gpu.cu new file mode 100644 index 00000000..6ee6d960 --- /dev/null +++ b/nvblox/tests/lib/cuda/interpolation_2d_gpu.cu @@ -0,0 +1,100 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/interpolation_2d_gpu.h" + +#include "nvblox/core/interpolation_2d.h" + +namespace nvblox { +namespace test_utils { + +template +__global__ void interpolate(const ElementType* image_unified_ptr, int rows, + int cols, const Vector2f* u_px_vec, + ElementType* values_ptr, int* success_flags, + int num_points) { + const int lin_idx = threadIdx.x + blockIdx.x * blockDim.x; + if (lin_idx < num_points) { + ElementType value; + if (interpolation::interpolate2DLinear(image_unified_ptr, u_px_vec[lin_idx], + rows, cols, &value)) { + success_flags[lin_idx] = 1; + values_ptr[lin_idx] = value; + } else { + success_flags[lin_idx] = 0; + } + } +} + +template +void linearInterpolateImageGpuTemplate(const Image& image, + const std::vector& u_px_vec, + std::vector* values_ptr, + std::vector* success_flags_ptr) { + CHECK_NOTNULL(values_ptr); + CHECK_NOTNULL(success_flags_ptr); + CHECK_EQ(values_ptr->size(), u_px_vec.size()); + CHECK_EQ(success_flags_ptr->size(), u_px_vec.size()); + // Move the data to the GPU + const int num_points = u_px_vec.size(); + Vector2f* u_px_vec_device_ptr; + ElementType* values_device_ptr; + int* success_flags_device_ptr; + checkCudaErrors( + cudaMalloc(&u_px_vec_device_ptr, num_points * sizeof(Eigen::Vector2f))); + checkCudaErrors( + cudaMalloc(&values_device_ptr, num_points * sizeof(ElementType))); + checkCudaErrors( + cudaMalloc(&success_flags_device_ptr, num_points * sizeof(int))); + checkCudaErrors(cudaMemcpy(u_px_vec_device_ptr, u_px_vec.data(), + num_points * sizeof(Eigen::Vector2f), + cudaMemcpyHostToDevice)); + + // Interpolate on the GPUUUU + constexpr int kThreadsPerBlock = 32; + const int num_blocks = static_cast(num_points / kThreadsPerBlock) + 1; + interpolate<<>>( + image.dataConstPtr(), image.rows(), image.cols(), u_px_vec_device_ptr, + values_device_ptr, success_flags_device_ptr, num_points); + checkCudaErrors(cudaPeekAtLastError()); + // Get results back + checkCudaErrors(cudaMemcpy(values_ptr->data(), values_device_ptr, + num_points * sizeof(ElementType), + cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(success_flags_ptr->data(), + success_flags_device_ptr, num_points * sizeof(int), + cudaMemcpyDeviceToHost)); + cudaFree(values_device_ptr); + cudaFree(success_flags_device_ptr); +} + +void linearInterpolateImageGpu(const DepthImage& image, + const std::vector& u_px_vec, + std::vector* values_ptr, + std::vector* success_flags_ptr) { + linearInterpolateImageGpuTemplate(image, u_px_vec, values_ptr, + success_flags_ptr); +} + +void linearInterpolateImageGpu(const ColorImage& image, + const std::vector& u_px_vec, + std::vector* values_ptr, + std::vector* success_flags_ptr) { + linearInterpolateImageGpuTemplate(image, u_px_vec, values_ptr, + success_flags_ptr); +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/projective_tsdf_integrator_cuda_components.cu b/nvblox/tests/lib/cuda/projective_tsdf_integrator_cuda_components.cu new file mode 100644 index 00000000..a293cc53 --- /dev/null +++ b/nvblox/tests/lib/cuda/projective_tsdf_integrator_cuda_components.cu @@ -0,0 +1,319 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/projective_tsdf_integrator_cuda_components.h" + +#include "nvblox/core/blox.h" +#include "nvblox/core/cuda/error_check.cuh" +#include "nvblox/core/interpolation_2d.h" + +namespace nvblox { +namespace test_utils { + +__global__ void transformPointsOnGPU(const Eigen::Matrix3f* R_B_A_matrix_ptr, + const Eigen::Vector3f* t_B_A_matrix_ptr, + const float* vecs_A_ptr, + const int num_vecs, float* vecs_B_ptr) { + // We first load the transform into shared memory for use by all threads in + // the block. The transformation matrix has 4x4=16 elements, so the first 16 + // threads of each block perform the load. + __shared__ Eigen::Matrix3f R_B_A; + if (threadIdx.x < 9) { + R_B_A.data()[threadIdx.x] = R_B_A_matrix_ptr->data()[threadIdx.x]; + } + __shared__ Eigen::Vector3f t_B_A; + if (threadIdx.x >= 9 && threadIdx.x < 12) { + t_B_A.data()[threadIdx.x - 9] = t_B_A_matrix_ptr->data()[threadIdx.x - 9]; + } + __syncthreads(); + + // Now perform transformation of the vectors. + const int vec_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (vec_idx < num_vecs) { + // Mapping the vecs + const Eigen::Map vecs_A(vecs_A_ptr, 3, num_vecs); + Eigen::Map vecs_B(vecs_B_ptr, 3, num_vecs); + // Transformation + vecs_B.col(vec_idx) = R_B_A * vecs_A.col(vec_idx) + t_B_A; + } +} + +__global__ void projectBlocksToCamera( + const Index3D* block_indices_device_ptr, const Camera* camera_device_ptr, + const Eigen::Matrix3f* R_C_L_device_ptr, + const Eigen::Vector3f* t_C_L_device_ptr, const float block_size, + BlockProjectionResult* block_projection_results_device_ptr) { + // Linear index of thread within block + const int thread_index_linear = + threadIdx.z + blockDim.z * (threadIdx.y + (blockDim.y * threadIdx.x)); + + // Get data needed by all threads into shared memory + __shared__ Eigen::Matrix3f R_C_L; + if (thread_index_linear < 9) { + R_C_L.data()[thread_index_linear] = + R_C_L_device_ptr->data()[thread_index_linear]; + } + __shared__ Eigen::Vector3f t_C_L; + if (thread_index_linear >= 9 && thread_index_linear < 12) { + t_C_L.data()[thread_index_linear - 9] = + t_C_L_device_ptr->data()[thread_index_linear - 9]; + } + __syncthreads(); + + // The indices of the voxel this thread will work on + // blockIdx.x - The index of the block we're working on (blockIdx.y/z + // should be zero) + // threadIdx.x/y/z - The indices of the voxel within the block (we + // expect the threadBlockDims == voxelBlockDims) + const Index3D block_idx = block_indices_device_ptr[blockIdx.x]; + const Index3D voxel_idx(threadIdx.x, threadIdx.y, threadIdx.z); + + // Voxel center point + const Vector3f p_voxel_center_L = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_idx, voxel_idx); + // To camera frame + const Vector3f p_voxel_center_C = R_C_L * p_voxel_center_L + t_C_L; + // Project to image plane + Eigen::Vector2f u_px; + if (!camera_device_ptr->project(p_voxel_center_C, &u_px)) { + return; + } + + // Map outputs + BlockProjectionResult* result_ptr = + &(block_projection_results_device_ptr[blockIdx.x]); + result_ptr->row(thread_index_linear) = u_px; +} + +__global__ void interpolate(const float* depth_frame, + const float* u_px_vec_device_ptr, const int rows, + const int cols, const int num_points, + float* interpolated_value_device_ptr) { + // Map the interpolation locations + const Eigen::Map u_px_vec(u_px_vec_device_ptr, + num_points, 2); + // Interpolate one of the points + const int u_px_vec_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (u_px_vec_idx < num_points) { + interpolation::interpolate2DLinear( + depth_frame, u_px_vec.row(u_px_vec_idx), rows, cols, + &interpolated_value_device_ptr[u_px_vec_idx]); + } +} + +__global__ void setVoxelBlock(VoxelBlock** block_device_ptrs) { + // The VoxelBlock that this ThreadBlock is working on + VoxelBlock* block_ptr = block_device_ptrs[blockIdx.x]; + block_ptr->voxels[threadIdx.z][threadIdx.y][threadIdx.x].distance = + threadIdx.x + blockDim.x * (threadIdx.y + blockDim.y * threadIdx.z); +} + +void setVoxelBlockOnGPU(TsdfLayer* layer) { + // Get a list of blocks to be modified on the CPU + const std::vector block_indices = layer->getAllBlockIndices(); + std::vector*> block_ptrs; + block_ptrs.reserve(block_indices.size()); + for (const Index3D& block_index : block_indices) { + block_ptrs.push_back(layer->getBlockAtIndex(block_index).get()); + } + + // Move the list to the GPU + VoxelBlock** block_device_ptrs; + checkCudaErrors( + cudaMalloc(&block_device_ptrs, + block_ptrs.size() * sizeof(VoxelBlock*))); + checkCudaErrors( + cudaMemcpy(block_device_ptrs, block_ptrs.data(), + block_ptrs.size() * sizeof(VoxelBlock*), + cudaMemcpyHostToDevice)); + + // Kernal - One thread block per block + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_blocks = block_indices.size(); + setVoxelBlock<<>>(block_device_ptrs); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); + + cudaFree(block_device_ptrs); +} + +Eigen::VectorXf interpolatePointsOnGPU(const DepthImage& depth_frame, + const Eigen::MatrixX2f& u_px_vec) { + // Transfer data to the GPU + float* depth_frame_device_ptr; + checkCudaErrors( + cudaMalloc(&depth_frame_device_ptr, depth_frame.numel() * sizeof(float))); + checkCudaErrors(cudaMemcpy(depth_frame_device_ptr, depth_frame.dataConstPtr(), + depth_frame.numel() * sizeof(float), + cudaMemcpyHostToDevice)); + float* u_px_vec_device_ptr; + checkCudaErrors( + cudaMalloc(&u_px_vec_device_ptr, u_px_vec.rows() * 2 * sizeof(float))); + checkCudaErrors(cudaMemcpy(u_px_vec_device_ptr, u_px_vec.data(), + u_px_vec.rows() * 2 * sizeof(float), + cudaMemcpyHostToDevice)); + + // Output location + float* interpolated_values_device_ptr; + checkCudaErrors(cudaMalloc(&interpolated_values_device_ptr, + u_px_vec.rows() * sizeof(float))); + + // Kernel - interpolation + const int num_points = u_px_vec.rows(); + constexpr int threadsPerBlock = 512; + const int blocksInGrid = (num_points / threadsPerBlock) + 1; + interpolate<<>>( + depth_frame_device_ptr, u_px_vec_device_ptr, depth_frame.rows(), depth_frame.cols(), + u_px_vec.rows(), interpolated_values_device_ptr); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); + + // Return the result + Eigen::VectorXf results(u_px_vec.rows()); + checkCudaErrors(cudaMemcpy(results.data(), interpolated_values_device_ptr, + u_px_vec.rows() * sizeof(float), + cudaMemcpyDeviceToHost)); + + cudaFree(depth_frame_device_ptr); + cudaFree(u_px_vec_device_ptr); + cudaFree(interpolated_values_device_ptr); + + return results; +} + +std::vector projectBlocksOnGPU( + const std::vector& block_indices, const Camera& camera, + const Transform& T_C_L, TsdfLayer* distance_layer_ptr) { + // Camera + Camera* camera_device_ptr; + checkCudaErrors(cudaMalloc(&camera_device_ptr, sizeof(Camera))); + checkCudaErrors(cudaMemcpy(camera_device_ptr, &camera, sizeof(Camera), + cudaMemcpyHostToDevice)); + + // Transformation + // NOTE(alexmillane): For some reason I only got things to work by separating + // the Eigen::Affine3f into the rotation matrix and translation vector... I + // cannot explain why it didn't work, but I spent hours trying to get it and I + // couldn't. + const Eigen::Matrix3f R_C_L = T_C_L.rotation(); + const Eigen::Vector3f t_C_L = T_C_L.translation(); + Eigen::Matrix3f* R_C_L_device_ptr; + Eigen::Vector3f* t_C_L_device_ptr; + checkCudaErrors(cudaMalloc(&R_C_L_device_ptr, sizeof(Eigen::Matrix3f))); + checkCudaErrors(cudaMemcpy(R_C_L_device_ptr, R_C_L.data(), + sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMalloc(&t_C_L_device_ptr, sizeof(Eigen::Vector3f))); + checkCudaErrors(cudaMemcpy(t_C_L_device_ptr, t_C_L.data(), + sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice)); + + // Copy the block indices to the GPU for projection + Index3D* block_indices_device_ptr; + checkCudaErrors(cudaMalloc(&block_indices_device_ptr, + block_indices.size() * sizeof(Index3D))); + checkCudaErrors(cudaMemcpy(block_indices_device_ptr, block_indices.data(), + block_indices.size() * sizeof(Index3D), + cudaMemcpyHostToDevice)); + + // Output space + BlockProjectionResult* block_projection_results_device_ptr; + checkCudaErrors( + cudaMalloc(&block_projection_results_device_ptr, + block_indices.size() * sizeof(BlockProjectionResult))); + + // TODO: CURRENTLY ASSUMES WE CAN LAUNCH AN INFINITE NUMBER OF THREAD BLOX + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const dim3 kThreadsPerBlock(kVoxelsPerSide, kVoxelsPerSide, kVoxelsPerSide); + const int num_blocks = block_indices.size(); + + projectBlocksToCamera<<>>( + block_indices_device_ptr, camera_device_ptr, R_C_L_device_ptr, + t_C_L_device_ptr, distance_layer_ptr->block_size(), + block_projection_results_device_ptr); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); + + // Copy over results + std::vector projection_results; + projection_results.resize(block_indices.size()); + checkCudaErrors( + cudaMemcpy(projection_results.data(), block_projection_results_device_ptr, + block_indices.size() * sizeof(BlockProjectionResult), + cudaMemcpyDeviceToHost)); + + // Free + cudaFree(R_C_L_device_ptr); + cudaFree(t_C_L_device_ptr); + cudaFree(block_indices_device_ptr); + cudaFree(block_projection_results_device_ptr); + + return projection_results; +} + +Eigen::Matrix3Xf transformPointsOnGPU(const Transform& T_B_A, + const Eigen::Matrix3Xf& vecs_A) { + // Move inputs + float* vecs_A_device_ptr; + const int num_elements = vecs_A.rows() * vecs_A.cols(); + checkCudaErrors(cudaMalloc(&vecs_A_device_ptr, num_elements * sizeof(float))); + checkCudaErrors(cudaMemcpy(vecs_A_device_ptr, vecs_A.data(), + num_elements * sizeof(float), + cudaMemcpyHostToDevice)); + // Transformation + // NOTE(alexmillane): For some reason I only got things to work by separating + // the Eigen::Affine3f into the rotation matrix and translation vector... I + // cannot explain why it didn't work, but I spent hours trying to get it and I + // couldn't. + const Eigen::Matrix3f R_B_A = T_B_A.rotation(); + const Eigen::Vector3f t_B_A = T_B_A.translation(); + Eigen::Matrix3f* R_A_B_device_ptr; + Eigen::Vector3f* t_A_B_device_ptr; + checkCudaErrors(cudaMalloc(&R_A_B_device_ptr, sizeof(Eigen::Matrix3f))); + checkCudaErrors(cudaMemcpy(R_A_B_device_ptr, R_B_A.data(), + sizeof(Eigen::Matrix3f), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMalloc(&t_A_B_device_ptr, sizeof(Eigen::Vector3f))); + checkCudaErrors(cudaMemcpy(t_A_B_device_ptr, t_B_A.data(), + sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice)); + + // Output space + float* vecs_B_device; + checkCudaErrors(cudaMalloc(&vecs_B_device, num_elements * sizeof(float))); + + // Kernel + const int num_vecs = vecs_A.cols(); + constexpr int threadsPerBlock = 512; + const int blocksInGrid = (num_vecs / threadsPerBlock) + 1; + transformPointsOnGPU<<>>( + R_A_B_device_ptr, t_A_B_device_ptr, vecs_A_device_ptr, num_vecs, + vecs_B_device); + checkCudaErrors(cudaGetLastError()); + checkCudaErrors(cudaDeviceSynchronize()); + + // Retrieve output + Eigen::Matrix3Xf vecs_B(3, num_vecs); + checkCudaErrors(cudaMemcpy(vecs_B.data(), vecs_B_device, + num_elements * sizeof(float), + cudaMemcpyDeviceToHost)); + + cudaFree(vecs_A_device_ptr); + cudaFree(R_A_B_device_ptr); + cudaFree(t_A_B_device_ptr); + cudaFree(vecs_B_device); + + return vecs_B; +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/cuda/test_utils_cuda.cu b/nvblox/tests/lib/cuda/test_utils_cuda.cu new file mode 100644 index 00000000..a9527c3d --- /dev/null +++ b/nvblox/tests/lib/cuda/test_utils_cuda.cu @@ -0,0 +1,164 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/test_utils_cuda.h" + +namespace nvblox { +namespace test_utils { + +/* Fills a device vector with elements + * Call with + * - GridDim: 1 block, 1D + * - BlockDim: 1D size > num_elements (1 thread per element) + */ +template +__global__ void fillVectorWithConstant(int num_elements, T value, T* vec) { + if ((blockIdx.x == 0) && (threadIdx.x < num_elements)) { + vec[threadIdx.x] = value; + } +} + +/* Fills a device vector with elements + * Call with + * - GridDim: 1D + * - BlockDim: 1D (1 thread per element) + */ +template +__global__ void checkVectorAllConstant(int num_elements, T value, const T* vec, + bool* flag) { + int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_idx == 0) { + *flag = true; + } + __syncthreads(); + if (thread_idx < num_elements) { + if (vec[thread_idx] != value) { + *flag = false; + } + } +} + +/* Fills a device vector with elements + * Call with + * - GridDim: N blocks, 1D + * - BlockDim: 1D (1 thread per element) + */ +__global__ void addOneToAll(int num_elements, int* vec) { + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_elements) { + vec[idx] = vec[idx] + 1; + } +} + +void fillVectorWithConstant(float value, unified_vector* vec_ptr) { + constexpr int kMaxThreadBlockSize = 512; + CHECK_LT(vec_ptr->size(), kMaxThreadBlockSize); + // kernel + int num_thread_blocks = 1; + int num_threads = vec_ptr->size(); + fillVectorWithConstant<<>>( + vec_ptr->size(), value, vec_ptr->data()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +void fillVectorWithConstant(int value, unified_vector* vec_ptr) { + constexpr int kMaxThreadBlockSize = 512; + CHECK_LT(vec_ptr->size(), kMaxThreadBlockSize); + // kernel + int num_thread_blocks = 1; + int num_threads = vec_ptr->size(); + fillVectorWithConstant<<>>( + vec_ptr->size(), value, vec_ptr->data()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +void fillWithConstant(float value, size_t num_elems, float* vec_ptr) { + constexpr int kMaxThreadBlockSize = 512; + CHECK_LT(num_elems, kMaxThreadBlockSize); + // kernel + int num_thread_blocks = 1; + int num_threads = num_elems; + fillVectorWithConstant<<>>( + num_elems, value, vec_ptr); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +void fillWithConstant(int value, size_t num_elems, int* vec_ptr) { + constexpr int kMaxThreadBlockSize = 512; + CHECK_LT(num_elems, kMaxThreadBlockSize); + // kernel + int num_thread_blocks = 1; + int num_threads = num_elems; + fillVectorWithConstant<<>>( + num_elems, value, vec_ptr); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +template +bool checkVectorAllConstantTemplate(const T* vec, T value, size_t size) { + // Allocate memory + bool* flag_device_ptr; + checkCudaErrors(cudaMalloc(&flag_device_ptr, sizeof(bool))); + + // Kernel + constexpr int kNumThreads = 512; + const int num_thread_blocks = size / kNumThreads + 1; + checkVectorAllConstant<<>>( + size, value, vec, flag_device_ptr); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); + + // Copy the flag back + bool flag; + cudaMemcpy(&flag, flag_device_ptr, sizeof(bool), cudaMemcpyDeviceToHost); + + // Free the flag + checkCudaErrors(cudaFree(flag_device_ptr)); + + return flag; +} + +bool checkVectorAllConstant(const unified_vector& vec, float value) { + return checkVectorAllConstantTemplate(vec.data(), value, vec.size()); +} + +bool checkVectorAllConstant(const unified_vector& vec, int value) { + return checkVectorAllConstantTemplate(vec.data(), value, vec.size()); +} + +bool checkAllConstant(const float* vec_ptr, float value, size_t num_elems) { + return checkVectorAllConstantTemplate(vec_ptr, value, num_elems); +} + +bool checkAllConstant(const int* vec_ptr, int value, size_t num_elems) { + return checkVectorAllConstantTemplate(vec_ptr, value, num_elems); +} + + +void addOneToAllGPU(unified_vector* vec_ptr) { + constexpr int kNumThreadsPerBlock = 512; + const int kNumBlocks = vec_ptr->size() / kNumThreadsPerBlock + 1; + addOneToAll<<>>(vec_ptr->size(), + vec_ptr->data()); + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaPeekAtLastError()); +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/lib/projective_tsdf_integrator_cpu.cpp b/nvblox/tests/lib/projective_tsdf_integrator_cpu.cpp new file mode 100644 index 00000000..3528e306 --- /dev/null +++ b/nvblox/tests/lib/projective_tsdf_integrator_cpu.cpp @@ -0,0 +1,87 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/projective_tsdf_integrator_cpu.h" + +namespace nvblox { + +void ProjectiveTsdfIntegratorCPU::updateBlocks( + const std::vector& block_indices, const DepthImage& depth_frame, + const Transform& T_L_C, const Camera& camera, + const float truncation_distance_m, TsdfLayer* layer) { + CHECK(layer->memory_type() == MemoryType::kUnified) + << "For CPU-based interpolation, the layer must be CPU accessible (ie " + "MemoryType::kUnified)."; + + // Update each block requested using the depth map. + Transform T_C_L = T_L_C.inverse(); + for (const Index3D& block_index : block_indices) { + VoxelBlock::Ptr block_ptr = layer->getBlockAtIndex(block_index); + + Index3D voxel_index; + for (voxel_index.x() = 0; voxel_index.x() < TsdfBlock::kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < TsdfBlock::kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < TsdfBlock::kVoxelsPerSide; + voxel_index.z()++) { + Vector3f p_L = getCenterPostionFromBlockIndexAndVoxelIndex( + layer->block_size(), block_index, voxel_index); + + // Convert the p_L to a p_C + Vector3f p_C = T_C_L * p_L; + Vector2f u_C; + if (!camera.project(p_C, &u_C)) { + // If the voxel isn't in the frame, next voxel plz + continue; + } + + float depth = -1.0; + if (!interpolation::interpolate2DLinear(depth_frame, u_C, &depth)) { + continue; + } + + DCHECK_GE(depth, 0.0f); + + TsdfVoxel& voxel = + block_ptr + ->voxels[voxel_index.x()][voxel_index.y()][voxel_index.z()]; + + // TODO: double-check this distance. + const float voxel_distance = depth - p_C.z(); + + // If we're behind the negative truncation distance, just continue. + if (voxel_distance < -truncation_distance_m) { + continue; + } + + const float measurement_weight = 1.0f; + const float fused_distance = (voxel_distance * measurement_weight + + voxel.distance * voxel.weight) / + (measurement_weight + voxel.weight); + + voxel.distance = + fused_distance > 0.0f + ? std::min(truncation_distance_m, fused_distance) + : std::max(-truncation_distance_m, fused_distance); + voxel.weight = + std::min(measurement_weight + voxel.weight, max_weight_); + } + } + } + } +} + +} // namespace nvblox diff --git a/nvblox/tests/lib/utils.cpp b/nvblox/tests/lib/utils.cpp new file mode 100644 index 00000000..9e0b312e --- /dev/null +++ b/nvblox/tests/lib/utils.cpp @@ -0,0 +1,70 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "nvblox/tests/utils.h" + +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/utils/timing.h" + +namespace nvblox { +namespace test_utils { + +float randomFloatInRange(float f_min, float f_max) { + float f = static_cast(std::rand()) / RAND_MAX; + return f_min + f * (f_max - f_min); +} + +float randomIntInRange(int i_min, int i_max) { + return static_cast(randomFloatInRange(i_min, i_max + 1)); +} + +float randomSign() { + constexpr int kHalfMaxRandMax = RAND_MAX / 2; + return (std::rand() < kHalfMaxRandMax) ? -1.0f : 1.0f; +} + +Index3D getRandomIndex3dInRange(const int min, const int max) { + return Index3D(test_utils::randomIntInRange(min, max), + test_utils::randomIntInRange(min, max), + test_utils::randomIntInRange(min, max)); +} + +Vector3f getRandomVector3fInRange(const float min, const float max) { + return Vector3f(test_utils::randomFloatInRange(min, max), + test_utils::randomFloatInRange(min, max), + test_utils::randomFloatInRange(min, max)); +} + +Vector3f getRandomVector3fInRange(const Vector3f& min, const Vector3f& max) { + return Vector3f(test_utils::randomFloatInRange(min.x(), max.x()), + test_utils::randomFloatInRange(min.y(), max.y()), + test_utils::randomFloatInRange(min.z(), max.z())); +} + +Vector3f getRandomUnitVector3f() { + return getRandomVector3fInRange(-1.0, 1.0).normalized(); +} + +Color randomColor() { + return Color(static_cast(randomIntInRange(0, 255)), + static_cast(randomIntInRange(0, 255)), + static_cast(randomIntInRange(0, 255))); +} + +} // namespace test_utils +} // namespace nvblox diff --git a/nvblox/tests/test_3d_interpolation.cpp b/nvblox/tests/test_3d_interpolation.cpp new file mode 100644 index 00000000..f9f30592 --- /dev/null +++ b/nvblox/tests/test_3d_interpolation.cpp @@ -0,0 +1,292 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/interpolation_3d.h" +#include "nvblox/core/layer.h" +#include "nvblox/primitives/primitives.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-6; + +constexpr int kXAxisIndex = 0; +constexpr int kYAxisIndex = 1; +constexpr int kZAxisIndex = 2; + +template +using SetVoxelFunctionType = std::function; + +template +void fillVoxelsWithIndices(VoxelBlock* block_ptr, + const int xyz_index, + SetVoxelFunctionType set_voxel, + const int offset = 0) { + CHECK_GE(xyz_index, 0); + CHECK_LT(xyz_index, 3); + Index3D index; + for (index.x() = 0; index.x() < TsdfBlock::kVoxelsPerSide; index.x()++) { + for (index.y() = 0; index.y() < TsdfBlock::kVoxelsPerSide; index.y()++) { + for (index.z() = 0; index.z() < TsdfBlock::kVoxelsPerSide; index.z()++) { + set_voxel(block_ptr->voxels[index.x()][index.y()][index.z()], + static_cast(index[xyz_index] + offset)); + } + } + } +} + +void fillVoxelsWithIndices(TsdfBlock* block_ptr, const int xyz_index, + const int offset = 0) { + auto setVoxelLambda = [](TsdfVoxel& voxel, float value) { + voxel.distance = value; + voxel.weight = 1.0f; + }; + fillVoxelsWithIndices(block_ptr, xyz_index, setVoxelLambda, + offset); +} + +void fillVoxelsWithIndices(EsdfBlock* block_ptr, const int xyz_index, + const int offset = 0) { + auto setVoxelLambda = [](EsdfVoxel& voxel, float value) { + voxel.squared_distance_vox = std::pow(value, 2); + voxel.observed = true; + }; + fillVoxelsWithIndices(block_ptr, xyz_index, setVoxelLambda, + offset); +} + +TEST(InterpolatorTest, NeighboursTest) { + // Empty layer + constexpr float kVoxelSize = 1.0f; + TsdfLayer layer(kVoxelSize, MemoryType::kUnified); + + // Allocate a block with its origin at the origin + TsdfBlock::Ptr block_ptr = layer.allocateBlockAtIndex(Index3D(0, 0, 0)); + + // Fill with such that the distances are equal to the x-index; + fillVoxelsWithIndices(block_ptr.get(), kXAxisIndex); + + // Dummy function which always says a voxel is valid + auto valid_lambda = [](const TsdfVoxel&) -> bool { return true; }; + + // Check the surrounding voxels + std::array voxels; + Vector3f p_L = 0.0 * Vector3f::Ones(); + EXPECT_FALSE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels)); + p_L = 0.1 * Vector3f::Ones(); + EXPECT_FALSE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels)); + p_L = 0.6 * Vector3f::Ones(); + EXPECT_TRUE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels)); + // Check that the surrounding voxels have x values {0,0,0,0,1,1,1,1} + for (int i = 0; i < 8; i++) { + if (i < 4) { + EXPECT_EQ(voxels[i].distance, 0); + } else { + EXPECT_EQ(voxels[i].distance, 1); + } + } + // Ask for neighbours outside the block bounds on the x-dimension + p_L = Vector3f(static_cast(TsdfBlock::kVoxelsPerSide), 0.6f, 0.6f); + EXPECT_FALSE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels)); + // Add a new block + block_ptr = layer.allocateBlockAtIndex(Index3D(1, 0, 0)); + fillVoxelsWithIndices(block_ptr.get(), kXAxisIndex); + // Now the test just outside the first block on the x-dimension should pass. + EXPECT_TRUE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels)); + // Check that the surrounding voxels have x values {7,7,7,7,0,0,0,0} + for (int i = 0; i < 8; i++) { + if (i < 4) { + EXPECT_EQ(voxels[i].distance, 7); + } else { + EXPECT_EQ(voxels[i].distance, 0); + } + } +} + +TEST(InterpolatorTest, OffsetTest) { + // Empty layer + constexpr float kVoxelSize = 1.0f; + TsdfLayer layer(kVoxelSize, MemoryType::kUnified); + + // Allocate a block with its origin at the origin + TsdfBlock::Ptr block_ptr = layer.allocateBlockAtIndex(Index3D(0, 0, 0)); + + // Dummy function which always says a voxel is valid + auto valid_lambda = [](const TsdfVoxel&) -> bool { return true; }; + + // Get the surrounding voxels (in this test we just look at the offset vector) + std::array voxels; + Vector3f p_offset_L; + Vector3f p_L = 0.5 * Vector3f::Ones(); + EXPECT_TRUE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels, &p_offset_L)); + + // TODO(alexmillane): Let's make some eigen checks.... + EXPECT_NEAR(p_offset_L.x(), 0.0f, kFloatEpsilon); + EXPECT_NEAR(p_offset_L.y(), 0.0f, kFloatEpsilon); + EXPECT_NEAR(p_offset_L.z(), 0.0f, kFloatEpsilon); + + p_L = 1.0 * Vector3f::Ones(); + EXPECT_TRUE(interpolation::internal::getSurroundingVoxels3D( + p_L, layer, valid_lambda, &voxels, &p_offset_L)); + EXPECT_NEAR(p_offset_L.x(), 0.5f, kFloatEpsilon); + EXPECT_NEAR(p_offset_L.y(), 0.5f, kFloatEpsilon); + EXPECT_NEAR(p_offset_L.z(), 0.5f, kFloatEpsilon); +} + +TEST(InterpolatorTest, InterpolationTest) { + // Make sure this is deterministic. + std::srand(0); + + // Create 3 layers where distances equal to x,y,z indices respectively. The + // idea is that interpolating in these layers at point p_L gives you p_L back + // for all points within the block (minus half a voxel width + // because indexing starts at the low-corner but interpolation is wrt to voxel + // center :)). + constexpr float kVoxelSize = 1.0f; + constexpr float KTestBlockSize = TsdfBlock::kVoxelsPerSide * kVoxelSize; + // TSDF + TsdfLayer layer_x_tsdf(kVoxelSize, MemoryType::kUnified); + TsdfLayer layer_y_tsdf(kVoxelSize, MemoryType::kUnified); + TsdfLayer layer_z_tsdf(kVoxelSize, MemoryType::kUnified); + TsdfBlock::Ptr tsdf_block_x_ptr = + layer_x_tsdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + TsdfBlock::Ptr tsdf_block_y_ptr = + layer_y_tsdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + TsdfBlock::Ptr tsdf_block_z_ptr = + layer_z_tsdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + fillVoxelsWithIndices(tsdf_block_x_ptr.get(), kXAxisIndex); + fillVoxelsWithIndices(tsdf_block_y_ptr.get(), kYAxisIndex); + fillVoxelsWithIndices(tsdf_block_z_ptr.get(), kZAxisIndex); + // ESDF + EsdfLayer layer_x_esdf(kVoxelSize, MemoryType::kUnified); + EsdfLayer layer_y_esdf(kVoxelSize, MemoryType::kUnified); + EsdfLayer layer_z_esdf(kVoxelSize, MemoryType::kUnified); + EsdfBlock::Ptr esdf_block_x_ptr = + layer_x_esdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + EsdfBlock::Ptr esdf_block_y_ptr = + layer_y_esdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + EsdfBlock::Ptr esdf_block_z_ptr = + layer_z_esdf.allocateBlockAtIndex(Index3D(0, 0, 0)); + fillVoxelsWithIndices(esdf_block_x_ptr.get(), kXAxisIndex); + fillVoxelsWithIndices(esdf_block_y_ptr.get(), kYAxisIndex); + fillVoxelsWithIndices(esdf_block_z_ptr.get(), kZAxisIndex); + + // Testing random points inside the block + constexpr int kNumTests = 1000; + for (int i = 0; i < kNumTests; i++) { + // Random point + const float half_voxel_size = kVoxelSize / 2.0f; + const Vector3f p_L = test_utils::getRandomVector3fInRange( + half_voxel_size, KTestBlockSize - half_voxel_size); + + // Interpolate + float interpolated_distance_x; + float interpolated_distance_y; + float interpolated_distance_z; + // TSDF + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_x_tsdf, + &interpolated_distance_x)); + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_y_tsdf, + &interpolated_distance_y)); + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_z_tsdf, + &interpolated_distance_z)); + // Check + EXPECT_NEAR(interpolated_distance_x, p_L.x() - 0.5f, kFloatEpsilon); + EXPECT_NEAR(interpolated_distance_y, p_L.y() - 0.5f, kFloatEpsilon); + EXPECT_NEAR(interpolated_distance_z, p_L.z() - 0.5f, kFloatEpsilon); + // ESDF + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_x_esdf, + &interpolated_distance_x)); + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_y_esdf, + &interpolated_distance_y)); + EXPECT_TRUE(interpolation::interpolateOnCPU(p_L, layer_z_esdf, + &interpolated_distance_z)); + // Check + EXPECT_NEAR(interpolated_distance_x, p_L.x() - 0.5f, kFloatEpsilon); + EXPECT_NEAR(interpolated_distance_y, p_L.y() - 0.5f, kFloatEpsilon); + EXPECT_NEAR(interpolated_distance_z, p_L.z() - 0.5f, kFloatEpsilon); + } +} + +TEST(InterpolatorTest, PrimitivesInterpolationTest) { + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 10.0; + constexpr float kMinWeight = 1.0; + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, 0.0f), + Vector3f(5.0f, 5.0f, 5.0f)); + // Create a scene with a ground plane and a sphere. + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Get the ground truth SDF for it. + constexpr float kVoxelSize_m = 0.2; + TsdfLayer gt_layer(kVoxelSize_m, MemoryType::kUnified); + constexpr float kTruncationDistanceMeters = 10; + scene.generateSdfFromScene(kTruncationDistanceMeters, >_layer); + + // Generate some random points in the arena + constexpr int kNumPointsToTest = 1000; + std::vector p_L_vec(kNumPointsToTest); + std::generate(p_L_vec.begin(), p_L_vec.end(), []() { + return Vector3f(test_utils::randomFloatInRange(-5.0f + kVoxelSize_m, + 5.0f - kVoxelSize_m), + test_utils::randomFloatInRange(-5.0f + kVoxelSize_m, + 5.0f - kVoxelSize_m), + test_utils::randomFloatInRange(0.0f + kVoxelSize_m, + 5.0f - kVoxelSize_m)); + }); + + // Interpolate the distance field at these locations + std::vector interpolated_distances; + std::vector success_flags; + interpolation::interpolateOnCPU(p_L_vec, gt_layer, &interpolated_distances, + &success_flags); + + // Get the true distances + std::vector gt_distances; + for (const Vector3f& p_L : p_L_vec) { + gt_distances.push_back(scene.getSignedDistanceToPoint(p_L, kMaxDist)); + } + + // Check + for (int i = 0; i < p_L_vec.size(); i++) { + EXPECT_NEAR(interpolated_distances[i], gt_distances[i], 0.5 * kVoxelSize_m); + } +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_3dmatch.cpp b/nvblox/tests/test_3dmatch.cpp new file mode 100644 index 00000000..dd7780d6 --- /dev/null +++ b/nvblox/tests/test_3dmatch.cpp @@ -0,0 +1,133 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include + +#include "nvblox/core/image.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" + +// DEBUG +#include +#include + +using namespace nvblox; + +constexpr float kTolerance = 1e-4; + +class Dataset3DMatchTest : public ::testing::Test { + protected: + void SetUp() override { base_path_ = "./data/3dmatch/"; } + + std::string base_path_; +}; + +TEST_F(Dataset3DMatchTest, ParseTransform) { + const std::string transform_filename = + datasets::threedmatch::getPathForFramePose(base_path_, 1, 0); + + Transform T_L_C_test; + + ASSERT_TRUE(datasets::threedmatch::parsePoseFromFile(transform_filename, + &T_L_C_test)); + + Eigen::Matrix4f T_L_C_mat; + T_L_C_mat << 3.13181000e-01, 3.09473000e-01, -8.97856000e-01, 1.97304600e+00, + -8.73910000e-02, -9.32015000e-01, -3.51729000e-01, 1.12573400e+00, + -9.45665000e-01, 1.88620000e-01, -2.64844000e-01, 3.09820000e-01, + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00; + + Transform T_L_C_true(T_L_C_mat); + + EXPECT_TRUE(T_L_C_test.isApprox(T_L_C_true)); +} + +TEST_F(Dataset3DMatchTest, ParseCameraIntrinsics) { + const std::string intrinsics_filename = + datasets::threedmatch::getPathForCameraIntrinsics(base_path_); + + Eigen::Matrix3f intrinsics_test; + + ASSERT_TRUE(datasets::threedmatch::parseCameraFromFile(intrinsics_filename, + &intrinsics_test)); + + Eigen::Matrix3f intrinsics_true; + intrinsics_true << 5.70342205e+02, 0.00000000e+00, 3.20000000e+02, + 0.00000000e+00, 5.70342205e+02, 2.40000000e+02, 0.00000000e+00, + 0.00000000e+00, 1.00000000e+00; + + EXPECT_TRUE(intrinsics_test.isApprox(intrinsics_true)); +} + +TEST_F(Dataset3DMatchTest, LoadImage) { + const std::string image_filename = + datasets::threedmatch::getPathForDepthImage(base_path_, 1, 0); + + DepthImage image; + ASSERT_TRUE(datasets::load16BitDepthImage(image_filename, &image)); + + EXPECT_EQ(image.rows(), 480); + EXPECT_EQ(image.cols(), 640); + + EXPECT_NEAR(image::minGPU(image), 0.0, kTolerance); + EXPECT_NEAR(image::maxGPU(image), 7.835, kTolerance); +} + +enum class LoaderType { kSingleThreaded, kMultiThreaded }; + +class LoaderParameterizedTest + : public Dataset3DMatchTest, + public ::testing::WithParamInterface { + // +}; + +TEST_P(LoaderParameterizedTest, ImageLoaderObject) { + const LoaderType loader_type = GetParam(); + std::unique_ptr> depth_loader_ptr; + if (loader_type == LoaderType::kSingleThreaded) { + depth_loader_ptr = + datasets::threedmatch::createDepthImageLoader(base_path_, 1); + } else { + constexpr int kNumThreads = 6; + depth_loader_ptr = + datasets::threedmatch::createMultithreadedDepthImageLoader( + base_path_, 1, kNumThreads); + } + + DepthImage depth_image_1; + DepthImage depth_image_2; + DepthImage depth_image_3; + DepthImage depth_image_4; + EXPECT_TRUE(depth_loader_ptr->getNextImage(&depth_image_1)); + EXPECT_TRUE(depth_loader_ptr->getNextImage(&depth_image_2)); + EXPECT_TRUE(depth_loader_ptr->getNextImage(&depth_image_3)); + EXPECT_FALSE(depth_loader_ptr->getNextImage(&depth_image_4)); + + EXPECT_EQ(depth_image_1.rows(), 480); + EXPECT_EQ(depth_image_1.cols(), 640); + EXPECT_NEAR(image::minGPU(depth_image_1), 0.0, kTolerance); + EXPECT_NEAR(image::maxGPU(depth_image_1), 7.835, kTolerance); +} + +INSTANTIATE_TEST_CASE_P(LoaderTests, LoaderParameterizedTest, + ::testing::Values(LoaderType::kSingleThreaded, + LoaderType::kMultiThreaded)); + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_blox.cpp b/nvblox/tests/test_blox.cpp new file mode 100644 index 00000000..c3cf8410 --- /dev/null +++ b/nvblox/tests/test_blox.cpp @@ -0,0 +1,96 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/unified_ptr.h" + +#include "nvblox/tests/blox.h" +#include "nvblox/tests/test_utils_cuda.h" + +using namespace nvblox; + +TEST(BloxTest, InitializeDistanceBloxOnGPU) { + TsdfBlock::Ptr block_ptr = TsdfBlock::allocate(MemoryType::kDevice); + TsdfVoxel zero_voxel; + zero_voxel.distance = 0.0f; + zero_voxel.weight = 0.0f; + EXPECT_TRUE(test_utils::checkBlockAllConstant(block_ptr, zero_voxel)); + TsdfVoxel one_voxel; + one_voxel.distance = 1.0f; + one_voxel.weight = 1.0f; + EXPECT_FALSE(test_utils::checkBlockAllConstant(block_ptr, one_voxel)); +} + +TEST(BloxTestDeathTest, NoAllocationDefined) { + // This block type has not defined an allocation function + EXPECT_FALSE(traits::has_allocate::value); +} + +TEST(BloxTest, CustomGPUInitialization) { + constexpr float block_size_m = 0.1; + BlockLayer> layer(block_size_m, MemoryType::kDevice); + auto block_ptr = layer.allocateBlockAtIndex(Index3D(0, 0, 0)); + TestVoxel one_voxel; + one_voxel.data = 1; + EXPECT_TRUE(test_utils::checkBlockAllConstant(block_ptr, one_voxel)); + TestVoxel zero_voxel; + zero_voxel.data = 0; + EXPECT_FALSE(test_utils::checkBlockAllConstant(block_ptr, zero_voxel)); +} + +TEST(BloxTest, ColorInitialization) { + // The color block has non-trivial initialization. Therefore we test this + // block specifically as a representative of the class of non-trivially + // constructed blocks. + + // kUnified + constexpr float block_size_m = 0.1; + BlockLayer layer_unified(block_size_m, MemoryType::kUnified); + auto block_ptr = layer_unified.allocateBlockAtIndex(Index3D(0, 0, 0)); + auto check_color_voxels = [](const Index3D& index, + const ColorVoxel* voxel_ptr) { + EXPECT_EQ(voxel_ptr->color, Color::Gray()); + EXPECT_EQ(voxel_ptr->weight, 0.0f); + }; + callFunctionOnAllVoxels(*block_ptr, check_color_voxels); + + // kDevice + BlockLayer layer_device(block_size_m, MemoryType::kDevice); + auto block_device_ptr = layer_device.allocateBlockAtIndex(Index3D(0, 0, 0)); + ColorVoxel gray_voxel; + gray_voxel.color.r = 127; + gray_voxel.color.g = 127; + gray_voxel.color.b = 127; + gray_voxel.weight = 0.0f; + EXPECT_TRUE(test_utils::checkBlockAllConstant(block_device_ptr, gray_voxel)); + ColorVoxel zero_voxel; + zero_voxel.color.r = 0; + zero_voxel.color.g = 0; + zero_voxel.color.b = 0; + zero_voxel.weight = 0.0f; + EXPECT_FALSE(test_utils::checkBlockAllConstant(block_device_ptr, zero_voxel)); +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_cake.cpp b/nvblox/tests/test_cake.cpp new file mode 100644 index 00000000..f4373734 --- /dev/null +++ b/nvblox/tests/test_cake.cpp @@ -0,0 +1,120 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include +#include +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/layer_cake.h" +#include "nvblox/core/voxels.h" + +#include "nvblox/tests/blox.h" + +using namespace nvblox; + +TEST(LayerCakeTest, addAndRetrieve) { + // Create + const float voxel_size = 0.1f; + LayerCake cake(voxel_size); + + // Add a TsdfLayer with a block + TsdfLayer* tsdf_layer_ptr_1 = cake.add(MemoryType::kUnified); + auto block_ptr_1 = tsdf_layer_ptr_1->allocateBlockAtIndex(Index3D(0, 0, 0)); + block_ptr_1->voxels[0][0][0].distance = 1.0f; + + // Retrieve + ASSERT_TRUE(cake.exists()); + const TsdfLayer* tsdf_layer_ptr_2 = cake.getConstPtr(); + EXPECT_TRUE(tsdf_layer_ptr_2->isBlockAllocated(Index3D(0, 0, 0))); + auto block_ptr_2 = tsdf_layer_ptr_2->getBlockAtIndex(Index3D(0, 0, 0)); + EXPECT_EQ(block_ptr_2->voxels[0][0][0].distance, 1.0f); +} + +TEST(LayerCakeTest, create) { + // Bring in a custom test layer + using DummyLayer = BlockLayer; + + // Create + const float voxel_size = 0.1f; + auto cake = LayerCake::create( + voxel_size, MemoryType::kUnified); + + // Checks + EXPECT_TRUE(cake.exists()); + EXPECT_TRUE(cake.exists()); + EXPECT_TRUE(cake.exists()); + EXPECT_TRUE(cake.exists()); + + EXPECT_FALSE(cake.exists()); +} + +TEST(LayerCakeTest, moveOperations) { + const float voxel_size = 0.1f; + LayerCake cake_1 = + LayerCake::create(voxel_size, MemoryType::kDevice); + + LayerCake cake_2 = + LayerCake::create(voxel_size, MemoryType::kDevice); + + cake_1 = std::move(cake_2); + + EXPECT_FALSE(cake_1.exists()); + EXPECT_FALSE(cake_1.exists()); + EXPECT_TRUE(cake_1.exists()); + EXPECT_TRUE(cake_1.exists()); + + EXPECT_FALSE(cake_2.exists()); + EXPECT_FALSE(cake_2.exists()); + EXPECT_FALSE(cake_2.exists()); + EXPECT_FALSE(cake_2.exists()); +} + +TEST(LayerCakeTest, voxelAndBlockSize) { + const float voxel_size = 0.1f; + LayerCake cake = + LayerCake::create(voxel_size, MemoryType::kDevice); + + const float expected_block_size = voxel_size * TsdfBlock::kVoxelsPerSide; + + EXPECT_EQ(cake.getPtr()->voxel_size(), 0.1f); + EXPECT_EQ(cake.getPtr()->block_size(), expected_block_size); +} + +TEST(LayerCakeTest, differentMemoryTypes) { + const float voxel_size = 0.1f; + + LayerCake cake = + LayerCake::create( + voxel_size, MemoryType::kDevice, MemoryType::kUnified, + MemoryType::kHost, MemoryType::kDevice); + + EXPECT_EQ(cake.get().memory_type(), MemoryType::kDevice); + EXPECT_EQ(cake.get().memory_type(), MemoryType::kUnified); + EXPECT_EQ(cake.get().memory_type(), MemoryType::kHost); + EXPECT_EQ(cake.get().memory_type(), MemoryType::kDevice); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_camera.cpp b/nvblox/tests/test_camera.cpp new file mode 100644 index 00000000..68204968 --- /dev/null +++ b/nvblox/tests/test_camera.cpp @@ -0,0 +1,353 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/bounding_boxes.h" +#include "nvblox/core/camera.h" +#include "nvblox/core/types.h" + +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +// TODO: Decide where to put test epsilons +// NOTE(alexmillane): I had to crank this up slightly to get things to pass... I +// guess this is just floating point errors accumulating? +constexpr float kFloatEpsilon = 1e-4; + +std::pair getRandomVisibleRayAndImagePoint( + const Camera& camera) { + // Random point on image plane + const Vector2f u_C(test_utils::randomFloatInRange( + 0.0f, static_cast(camera.width() - 1)), + test_utils::randomFloatInRange( + 0.0f, static_cast(camera.height() - 1))); + // Normalized ray + return {camera.rayFromImagePlaneCoordinates(u_C).normalized(), u_C}; +} + +Camera getTestCamera() { + // Arbitrary camera + constexpr float fu = 300; + constexpr float fv = 300; + constexpr int width = 640; + constexpr int height = 480; + constexpr float cu = static_cast(width) / 2.0f; + constexpr float cv = static_cast(height) / 2.0f; + return Camera(fu, fv, cu, cv, width, height); +} + +TEST(CameraTest, PointsInView) { + // Make sure this is deterministic. + std::srand(0); + + const Camera camera = getTestCamera(); + + // Generate some random points (in view) and project them back + constexpr int kNumPoints = 1000; + for (int i = 0; i < kNumPoints; i++) { + Vector3f ray_C; + Vector2f u_C; + std::tie(ray_C, u_C) = getRandomVisibleRayAndImagePoint(camera); + const Vector3f p_C = test_utils::randomFloatInRange(1.0, 1000.0) * ray_C; + Vector2f u_reprojection_C; + EXPECT_TRUE(camera.project(p_C, &u_reprojection_C)); + EXPECT_TRUE(((u_reprojection_C - u_C).array().abs() < kFloatEpsilon).all()); + } +} + +TEST(CameraTest, CenterPixel) { + // Make sure this is deterministic. + std::srand(0); + + const Camera camera = getTestCamera(); + + // Center + const Vector3f center_ray = Vector3f(0.0f, 0.0f, 1.0f); + const Vector3f p_C = test_utils::randomFloatInRange(1.0, 1000.0) * center_ray; + Eigen::Vector2f u; + EXPECT_TRUE(camera.project(p_C, &u)); + EXPECT_TRUE( + ((u - Vector2f(camera.cu(), camera.cv())).array().abs() < kFloatEpsilon) + .all()); +} + +TEST(CameraTest, BehindCamera) { + // Make sure this is deterministic. + std::srand(0); + + const Camera camera = getTestCamera(); + + constexpr int kNumPoints = 1000; + for (int i = 0; i < kNumPoints; i++) { + Vector3f ray_C; + Vector2f u_C; + std::tie(ray_C, u_C) = getRandomVisibleRayAndImagePoint(camera); + Vector3f p_C = test_utils::randomFloatInRange(1.0, 1000.0) * ray_C; + // The negative here puts the point behind the camera + p_C.z() = -1.0f * p_C.z(); + Vector2f u_reprojection_C; + EXPECT_FALSE(camera.project(p_C, &u_reprojection_C)); + } +} + +TEST(CameraTest, OutsideImagePlane) { + // Make sure this is deterministic. + std::srand(0); + + const Camera camera = getTestCamera(); + + constexpr int kNumPoints = 1000; + for (int i = 0; i < kNumPoints; i++) { + // Random point off image plane + // Add a random offset to the center pixel with sufficient magnitude to take + // it off the plane. + constexpr float kOffImagePlaneFactor = 5.0; + const Vector2f u_perturbation_C( + test_utils::randomSign() * + test_utils::randomFloatInRange( + camera.width() / 2.0, kOffImagePlaneFactor * camera.width()), + test_utils::randomSign() * + test_utils::randomFloatInRange( + camera.height() / 2.0, kOffImagePlaneFactor * camera.height())); + const Vector2f u_C = Vector2f(camera.cu(), camera.cv()) + u_perturbation_C; + + // NOTE(alexmillane): My own ray-from-pixel function to not trigger checks + // because the pixel is off the image plane. + const auto rayFromPixelNoChecks = [camera](const auto& u_C) { + return Vector3f((u_C[0] - camera.cu()) / camera.fu(), + (u_C[1] - camera.cv()) / camera.fv(), 1.0f); + }; + const Vector3f ray_C = rayFromPixelNoChecks(u_C); + const Vector3f p_C = test_utils::randomFloatInRange(1.0, 1000.0) * ray_C; + Vector2f u_reprojection_C; + EXPECT_FALSE(camera.project(p_C, &u_reprojection_C)); + } +} + +TEST(CameraTest, AxisAlignedBoundingBox) { + // Make sure this is deterministic. + std::srand(0); + + const Camera camera = getTestCamera(); + + // Rays through the corners of the image plane + const Vector3f ray_0_C = + camera.rayFromImagePlaneCoordinates(Vector2f(0.0f, 0.0f)); + const Vector3f ray_2_C = + camera.rayFromImagePlaneCoordinates(Vector2f(0.0f, camera.height())); + const Vector3f ray_1_C = + camera.rayFromImagePlaneCoordinates(Vector2f(camera.width(), 0.0f)); + const Vector3f ray_3_C = camera.rayFromImagePlaneCoordinates( + Vector2f(camera.width(), camera.height())); + + // Generate a random depths + constexpr float kMinimumDepthPx = 1.0; + constexpr float kMaximumDepthPx = 1000.0; + const float min_depth = + test_utils::randomFloatInRange(kMinimumDepthPx, kMaximumDepthPx); + const float max_depth = + test_utils::randomFloatInRange(kMinimumDepthPx, kMaximumDepthPx); + + // True bounding box from the 3D points + AlignedVector view_corners_C = { + min_depth * ray_0_C, max_depth * ray_0_C, // NOLINT + min_depth * ray_1_C, max_depth * ray_1_C, // NOLINT + min_depth * ray_2_C, max_depth * ray_2_C, // NOLINT + min_depth * ray_3_C, max_depth * ray_3_C // NOLINT + }; + AxisAlignedBoundingBox aabb_true; + std::for_each(view_corners_C.begin(), view_corners_C.end(), + [&aabb_true](const Vector3f& p) { aabb_true.extend(p); }); + + // Bounding box approximated by the camera model. + // TODO(alexmillane): Only tested with identity transform at the moment. + const Transform T_L_C = Transform::Identity(); + const AxisAlignedBoundingBox aabb_test = + camera.getViewAABB(T_L_C, min_depth, max_depth); + + EXPECT_TRUE(aabb_true.isApprox(aabb_test)) + << "AABB true: " << aabb_true.min().transpose() << " " + << aabb_true.max().transpose() + << " AABB test: " << aabb_test.min().transpose() << " " + << aabb_test.max().transpose(); +} + +TEST(CameraTest, FrustumTest) { + constexpr float kMinDist = 1.0f; + constexpr float kMaxDist = 10.0f; + + const Camera camera = getTestCamera(); + + Frustum frustum(camera, Transform::Identity(), kMinDist, kMaxDist); + + // Project a point into the camera. + Vector3f point_C(0.5, 0.5, 5.0); + Vector2f u_C; + ASSERT_TRUE(camera.project(point_C, &u_C)); + + // Check that the point is within the frustum. + EXPECT_TRUE(frustum.isPointInView(point_C)); + + // Check a point further than the max dist. + point_C << 0.5, 0.5, kMaxDist + 10.0f; + ASSERT_TRUE(camera.project(point_C, &u_C)); + EXPECT_FALSE(frustum.isPointInView(point_C)); + + // Check a point closer than the max dist. + point_C << 0.0, 0.0, 0.3f; + ASSERT_TRUE(camera.project(point_C, &u_C)); + EXPECT_FALSE(frustum.isPointInView(point_C)); +} + +TEST(CameraTest, FrustumAABBTest) { + constexpr float kMinDist = 1.0f; + constexpr float kMaxDist = 10.0f; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const Camera camera = getTestCamera(); + + Frustum frustum(camera, Transform::Identity(), kMinDist, kMaxDist); + AxisAlignedBoundingBox view_aabb = + camera.getViewAABB(Transform::Identity(), kMinDist, kMaxDist); + + // Double-check that the camera and the frustum AABB match. + EXPECT_TRUE(frustum.isAABBInView(view_aabb)); + EXPECT_TRUE(view_aabb.isApprox(frustum.getAABB())); + + // Get all blocks in the view AABB and make sure that some of them are + // actually in the view. + const float block_size = 1.0f; + std::vector block_indices_in_aabb = + getBlockIndicesTouchedByBoundingBox(block_size, view_aabb); + std::vector block_indices_in_frustum; + for (const Index3D& block_index : block_indices_in_aabb) { + const AxisAlignedBoundingBox& aabb_block = + getAABBOfBlock(block_size, block_index); + if (frustum.isAABBInView(aabb_block)) { + block_indices_in_frustum.push_back(block_index); + } + } + + EXPECT_GT(block_indices_in_aabb.size(), block_indices_in_frustum.size()); + EXPECT_GT(block_indices_in_frustum.size(), 0); + + // Check all voxels within the view and make sure that they're correctly + // marked. + for (const Index3D& block_index : block_indices_in_aabb) { + Index3D voxel_index; + + // Iterate over all the voxels: + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + Vector3f position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + Eigen::Vector2f u_C; + bool in_frustum = frustum.isPointInView(position); + bool in_camera = camera.project(position, &u_C); + if (position.z() <= kMaxDist && position.z() >= kMinDist) { + EXPECT_EQ(in_frustum, in_camera); + } else { + // Doesn't matter if we're within the camera view if it's false. + EXPECT_FALSE(in_frustum); + } + } + } + } + } +} + +TEST(CameraTest, FrustumAtLeastOneValidVoxelTest) { + constexpr float kMinDist = 0.0f; + constexpr float kMaxDist = 10.0f; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + const Camera camera = getTestCamera(); + + Frustum frustum(camera, Transform::Identity(), kMinDist, kMaxDist); + AxisAlignedBoundingBox view_aabb = + camera.getViewAABB(Transform::Identity(), kMinDist, kMaxDist); + + // Double-check that the camera and the frustum AABB match. + EXPECT_TRUE(frustum.isAABBInView(view_aabb)); + EXPECT_TRUE(view_aabb.isApprox(frustum.getAABB())); + + // Get all blocks in the view AABB and make sure that some of them are + // actually in the view. + const float block_size = 0.5f; + std::vector block_indices_in_aabb = + getBlockIndicesTouchedByBoundingBox(block_size, view_aabb); + std::vector block_indices_in_frustum; + for (const Index3D& block_index : block_indices_in_aabb) { + const AxisAlignedBoundingBox& aabb_block = + getAABBOfBlock(block_size, block_index); + if (frustum.isAABBInView(aabb_block)) { + block_indices_in_frustum.push_back(block_index); + } + } + + EXPECT_GT(block_indices_in_aabb.size(), block_indices_in_frustum.size()); + EXPECT_GT(block_indices_in_frustum.size(), 0); + + // Check that for any given block in the frustum, there's AT LEAST one valid + // voxel. + int empty = 0; + for (const Index3D& block_index : block_indices_in_frustum) { + Index3D voxel_index; + bool any_valid = false; + // Iterate over all the voxels: + for (voxel_index.x() = 0; voxel_index.x() < kVoxelsPerSide; + voxel_index.x()++) { + for (voxel_index.y() = 0; voxel_index.y() < kVoxelsPerSide; + voxel_index.y()++) { + for (voxel_index.z() = 0; voxel_index.z() < kVoxelsPerSide; + voxel_index.z()++) { + Vector3f position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + Eigen::Vector2f u_C; + bool in_frustum = frustum.isPointInView(position); + bool in_camera = camera.project(position, &u_C); + any_valid = in_camera || any_valid; + if (position.z() >= 0.0f && position.z() < 1e-4f) { + // Nothing. + } else if (position.z() <= kMaxDist && position.z() > kMinDist) { + EXPECT_EQ(in_frustum, in_camera); + } else { + // Doesn't matter if we're within the camera view if it's false. + EXPECT_FALSE(in_frustum); + } + } + } + } + const AxisAlignedBoundingBox& aabb_block = + getAABBOfBlock(block_size, block_index); + if (!any_valid) { + empty++; + } + } + // At MOST 3% empty on the corners. + EXPECT_LE(static_cast(empty) / block_indices_in_frustum.size(), 0.03); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_color_image.cpp b/nvblox/tests/test_color_image.cpp new file mode 100644 index 00000000..7204fa0c --- /dev/null +++ b/nvblox/tests/test_color_image.cpp @@ -0,0 +1,228 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/image.h" +#include "nvblox/core/interpolation_2d.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/io/csv.h" + +#include "nvblox/tests/gpu_image_routines.h" +#include "nvblox/tests/interpolation_2d_gpu.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +TEST(ColorImageTest, SetConstant) { + // Set constant on the GPU + ColorImage image(480, 640, MemoryType::kUnified); + const Color color(255, 0, 0); + test_utils::setImageConstantOnGpu(color, &image); + // Check on the CPU + for (int row_idx = 0; row_idx < image.rows(); row_idx++) { + for (int col_idx = 0; col_idx < image.cols(); col_idx++) { + EXPECT_EQ(image(row_idx, col_idx), color); + } + } +} + +TEST(ColorImageTest, NearbyImagesSimilar) { + // This tests that two consequtive images in the 3DMatch dataset have small + // pixelwise difference. + + // Load 3dmatch image + const std::string base_path = "./data/3dmatch"; + constexpr int seq_id = 1; + ColorImage image_1; + EXPECT_TRUE(datasets::load8BitColorImage( + datasets::threedmatch::getPathForColorImage(base_path, seq_id, 0), + &image_1, MemoryType::kUnified)); + ColorImage image_2; + EXPECT_TRUE(datasets::load8BitColorImage( + datasets::threedmatch::getPathForColorImage(base_path, seq_id, 1), + &image_2, MemoryType::kUnified)); + + // Compute the diff image on the GPU + ColorImage diff_image; + test_utils::getDifferenceImageOnGPU(image_1, image_2, &diff_image); + + // Write diff image + io::writeToCsv("./color_image_difference.csv", diff_image); + + // Check that there's not much difference between the images using the CPU + // - 50 pixel values means constitutes a "big" difference for this test + // - We allow up to 10% of pixels to have a "big" difference. + constexpr float kBigDifferenceThreshold = 50.0; + constexpr float kAllowableBigDifferencePixelsRatio = 0.10; + CHECK_EQ(image_1.rows(), image_2.rows()); + CHECK_EQ(image_1.cols(), image_2.cols()); + int num_big_diff_pixels = 0; + for (int i = 0; i < image_2.numel(); i++) { + const float r_diff = + static_cast(image_1(i).r) - static_cast(image_2(i).r); + const float g_diff = + static_cast(image_1(i).g) - static_cast(image_2(i).g); + const float b_diff = + static_cast(image_1(i).b) - static_cast(image_2(i).b); + const float average_diff = + (std::fabs(r_diff) + std::fabs(g_diff) + std::fabs(b_diff)) / 3.0f; + if (average_diff > kBigDifferenceThreshold) { + ++num_big_diff_pixels; + } + } + const float big_diff_pixel_ratio = static_cast(num_big_diff_pixels) / + static_cast(image_1.numel()); + std::cout << "num_big_diff_pixels: " << num_big_diff_pixels << std::endl; + std::cout << "big_diff_pixel_ratio: " << big_diff_pixel_ratio << std::endl; + EXPECT_LT(big_diff_pixel_ratio, kAllowableBigDifferencePixelsRatio); +} + +TEST(ColorImageTest, InterpolationNearestNeighbour) { + // Tiny image with col index in g channel, row index in 1. + ColorImage image(2, 2, MemoryType::kUnified); + image(0, 0) = Color(0, 0, 0); + image(0, 1) = Color(0, 1, 0); + image(1, 0) = Color(1, 0, 0); + image(1, 1) = Color(1, 1, 0); + + // Note the switching in index order below. This is because images are set: + // image(row,col) = val + // but interpolated: + // val = image(u_px(x,y)) = image(y,x) + Color color; + EXPECT_TRUE(interpolation::interpolate2D( + image, Vector2f(0.4, 0.4), &color, InterpolationType::kNearestNeighbor)); + EXPECT_EQ(color.r, 0); + EXPECT_EQ(color.g, 0); + EXPECT_TRUE(interpolation::interpolate2D( + image, Vector2f(0.4, 0.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_EQ(color.r, 1); + EXPECT_EQ(color.g, 0); + EXPECT_TRUE(interpolation::interpolate2D( + image, Vector2f(0.6, 0.4), &color, InterpolationType::kNearestNeighbor)); + EXPECT_EQ(color.r, 0); + EXPECT_EQ(color.g, 1); + EXPECT_TRUE(interpolation::interpolate2D( + image, Vector2f(0.6, 0.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_EQ(color.r, 1); + EXPECT_EQ(color.g, 1); + + // Out of bounds + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(-0.6, 0.0), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(0.0, -0.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE( + interpolation::interpolate2D(image, Vector2f(-0.6, -0.6), &color, + InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(1.6, 0.0), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(0.0, 1.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(1.6, 1.6), &color, InterpolationType::kNearestNeighbor)); +} + +TEST(ColorImageTest, InterpolationLinear) { + // Tiny image with col index in g channel, row index in 1. x10 + ColorImage image(2, 2, MemoryType::kUnified); + image(0, 0) = Color(5, 5, 0); + image(0, 1) = Color(5, 15, 0); + image(1, 0) = Color(15, 5, 0); + image(1, 1) = Color(15, 15, 0); + + Color color; + EXPECT_TRUE(interpolation::interpolate2D(image, Vector2f(0.5, 0.5), &color, + InterpolationType::kLinear)); + EXPECT_EQ(color.r, 5); + EXPECT_EQ(color.g, 5); + + EXPECT_TRUE(interpolation::interpolate2D(image, Vector2f(0.4, 0.4), &color, + InterpolationType::kLinear)); + EXPECT_EQ(color.r, 4); + EXPECT_EQ(color.g, 4); + EXPECT_TRUE(interpolation::interpolate2D(image, Vector2f(0.4, 0.6), &color, + InterpolationType::kLinear)); + EXPECT_EQ(color.r, 6); + EXPECT_EQ(color.g, 4); + EXPECT_TRUE(interpolation::interpolate2D(image, Vector2f(0.6, 0.4), &color, + InterpolationType::kLinear)); + EXPECT_EQ(color.r, 4); + EXPECT_EQ(color.g, 6); + EXPECT_TRUE(interpolation::interpolate2D(image, Vector2f(0.6, 0.6), &color, + InterpolationType::kLinear)); + EXPECT_EQ(color.r, 6); + EXPECT_EQ(color.g, 6); + + // Out of bounds + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(-0.6, 0.0), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(0.0, -0.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE( + interpolation::interpolate2D(image, Vector2f(-0.6, -0.6), &color, + InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(1.6, 0.0), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(0.0, 1.6), &color, InterpolationType::kNearestNeighbor)); + EXPECT_FALSE(interpolation::interpolate2D( + image, Vector2f(1.6, 1.6), &color, InterpolationType::kNearestNeighbor)); +} + +TEST(ColorImageTest, InterpolationGPU) { + // Tiny image with col coord in g channel, row coord in r. x10 + ColorImage image(2, 2, MemoryType::kUnified); + image(0, 0) = Color(5, 5, 0); + image(0, 1) = Color(5, 15, 0); + image(1, 0) = Color(15, 5, 0); + image(1, 1) = Color(15, 15, 0); + + constexpr int kNumTests = 1000; + std::vector u_px_vec; + u_px_vec.reserve(kNumTests); + for (int i = 0; i < kNumTests; i++) { + u_px_vec.push_back( + Eigen::Vector2f(test_utils::randomFloatInRange(0.5f, 1.5f), + test_utils::randomFloatInRange(0.5f, 1.5f))); + } + + std::vector values(kNumTests, Color(0, 0, 0)); + std::vector success_flags(kNumTests, 0); + test_utils::linearInterpolateImageGpu(image, u_px_vec, &values, + &success_flags); + + for (int i = 0; i < kNumTests; i++) { + EXPECT_TRUE(success_flags[i] == 1); + const uint8_t x = + static_cast(std::round((u_px_vec[i].x() * 10.0f))); + const uint8_t y = + static_cast(std::round((u_px_vec[i].y() * 10.0f))); + EXPECT_NEAR(values[i].g, x, kFloatEpsilon); + EXPECT_NEAR(values[i].r, y, kFloatEpsilon); + } +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_color_integrator.cpp b/nvblox/tests/test_color_integrator.cpp new file mode 100644 index 00000000..8336554c --- /dev/null +++ b/nvblox/tests/test_color_integrator.cpp @@ -0,0 +1,479 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/blox.h" +#include "nvblox/core/bounding_boxes.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/interpolation_3d.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/projective_color_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/mesh_io.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/primitives/primitives.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/gpu_image_routines.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +class ColorIntegrationTest : public ::testing::Test { + protected: + ColorIntegrationTest() + : kSphereCenter(Vector3f(0.0f, 0.0f, 2.0f)), + gt_layer_(voxel_size_m_, MemoryType::kUnified), + camera_(Camera(fu_, fv_, cu_, cv_, width_, height_)) { + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 10.0; + constexpr float kMinWeight = 1.0; + + // Tolerance for error. + constexpr float kDistanceErrorTolerance = truncation_distance_m_; + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + scene.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, 0.0f), + Vector3f(5.0f, 5.0f, 5.0f)); + // Create a scene with a ground plane and a sphere. + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Get the ground truth SDF for it. + scene.generateSdfFromScene(truncation_distance_m_, >_layer_); + } + + // Scenes + constexpr static float kSphereRadius = 2.0f; + const Vector3f kSphereCenter; + + // Test layer + constexpr static float voxel_size_m_ = 0.1; + constexpr static float block_size_m_ = + VoxelBlock::kVoxelsPerSide * voxel_size_m_; + TsdfLayer gt_layer_; + + // Truncation distance + constexpr static float truncation_distance_vox_ = 4; + constexpr static float truncation_distance_m_ = + truncation_distance_vox_ * voxel_size_m_; + + // Test camera + constexpr static float fu_ = 300; + constexpr static float fv_ = 300; + constexpr static int width_ = 640; + constexpr static int height_ = 480; + constexpr static float cu_ = static_cast(width_) / 2.0f; + constexpr static float cv_ = static_cast(height_) / 2.0f; + Camera camera_; + + // Test Scene + primitives::Scene scene; +}; + +// ProjectiveTsdfIntegrator child that gives the tests access to the internal +// functions. +class TestProjectiveColorIntegratorGPU : public ProjectiveColorIntegrator { + public: + TestProjectiveColorIntegratorGPU() : ProjectiveColorIntegrator() {} + FRIEND_TEST(ColorIntegrationTest, TruncationBandTest); +}; + +ColorImage generateSolidColorImage(const Color& color, const int height, + const int width) { + // Generate a random color for this scene + ColorImage image(height, width); + nvblox::test_utils::setImageConstantOnGpu(color, &image); + return image; +} + +std::vector getPointsOnASphere(const float radius, + const Eigen::Vector3f& center, + const int points_per_rad = 10) { + std::vector sphere_points; + for (int azimuth_idx = 0; azimuth_idx < 2 * points_per_rad; azimuth_idx++) { + for (int elevation_idx = 0; elevation_idx < points_per_rad; + elevation_idx++) { + const float azimuth = azimuth_idx * M_PI / points_per_rad - M_PI; + const float elevation = + elevation_idx * M_PI / points_per_rad - M_PI / 2.0f; + Eigen::Vector3f p = + radius * Eigen::Vector3f(cos(azimuth) * sin(elevation), + sin(azimuth) * sin(elevation), + cos(elevation)); + p += center; + sphere_points.push_back(p); + } + } + return sphere_points; +} + +float checkSphereColor(const ColorLayer& color_layer, const Vector3f& center, + const float radius, const Color& color) { + // Check that each sphere is colored appropriately (if observed) + int num_observed = 0; + int num_tested = 0; + auto check_color = [&num_tested, &num_observed]( + const ColorVoxel& voxel, + const Color& color_2) -> void { + ++num_tested; + if (voxel.weight >= 1.0f) { + EXPECT_EQ(voxel.color, color_2); + ++num_observed; + } + }; + + const std::vector sphere_points = + getPointsOnASphere(radius, center); + for (const Vector3f p : sphere_points) { + const ColorVoxel* color_voxel; + EXPECT_TRUE(getVoxelAtPosition(color_layer, p, &color_voxel)); + check_color(*color_voxel, color); + } + + const float ratio_observed_points = + static_cast(num_observed) / static_cast(num_tested); + return ratio_observed_points; +} + +TEST_F(ColorIntegrationTest, TruncationBandTest) { + // Check the GPU version against a hand-rolled CPU implementation. + TestProjectiveColorIntegratorGPU integrator; + + // The distance from the surface that we "pass" blocks within. + constexpr float kTestDistance = voxel_size_m_; + + std::vector all_indices = gt_layer_.getAllBlockIndices(); + std::vector valid_indices = + integrator.reduceBlocksToThoseInTruncationBand(all_indices, gt_layer_, + kTestDistance); + integrator.finish(); + + // Horrible N^2 complexity set_difference implementation. But easy to write :) + std::vector not_valid_indices; + for (const Index3D& idx : all_indices) { + if (std::find(valid_indices.begin(), valid_indices.end(), idx) == + valid_indices.end()) { + not_valid_indices.push_back(idx); + } + } + + // Check indices touching band + for (const Index3D& idx : valid_indices) { + const auto block_ptr = gt_layer_.getBlockAtIndex(idx); + bool touches_band = false; + auto touches_band_lambda = [&touches_band, kTestDistance]( + const Index3D& voxel_index, + const TsdfVoxel* voxel) -> void { + if (std::abs(voxel->distance) <= kTestDistance) { + touches_band = true; + } + }; + callFunctionOnAllVoxels(*block_ptr, touches_band_lambda); + EXPECT_TRUE(touches_band); + } + + // Check indices NOT touching band + for (const Index3D& idx : not_valid_indices) { + const auto block_ptr = gt_layer_.getBlockAtIndex(idx); + bool touches_band = false; + auto touches_band_lambda = [&touches_band, kTestDistance]( + const Index3D& voxel_index, + const TsdfVoxel* voxel) -> void { + if (std::abs(voxel->distance) <= kTestDistance) { + touches_band = true; + } + }; + callFunctionOnAllVoxels(*block_ptr, touches_band_lambda); + EXPECT_FALSE(touches_band); + } +} + +TEST_F(ColorIntegrationTest, IntegrateColorToGroundTruthDistanceField) { + // Create an integrator. + ProjectiveColorIntegrator color_integrator; + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 80; + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Color layer + ColorLayer color_layer(voxel_size_m_, MemoryType::kUnified); + + // Generate a random color for this scene + const Color color = Color::Red(); + const ColorImage image = generateSolidColorImage(color, height_, width_); + + // Set keeping track of which blocks were touched during the test + Index3DSet touched_blocks; + + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate an image with a single color + std::vector updated_blocks; + color_integrator.integrateFrame(image, T_S_C, camera_, gt_layer_, + &color_layer, &updated_blocks); + // Accumulate touched block indices + std::copy(updated_blocks.begin(), updated_blocks.end(), + std::inserter(touched_blocks, touched_blocks.end())); + } + + // Lambda that checks if voxels have the passed color (if they have weight > + // 0) + auto color_check_lambda = [&color](const Index3D& voxel_idx, + const ColorVoxel* voxel) -> void { + if (voxel->weight > 0.0f) { + EXPECT_EQ(voxel->color, color); + } + }; + + // Check that all touched blocks are the color we chose + for (const Index3D& block_idx : touched_blocks) { + callFunctionOnAllVoxels(*color_layer.getBlockAtIndex(block_idx), + color_check_lambda); + } + + // Check that most points on the surface of the sphere have been observed + int num_points_on_sphere_surface_observed = 0; + const std::vector sphere_points = + getPointsOnASphere(kSphereRadius, kSphereCenter); + const int num_surface_points_tested = sphere_points.size(); + for (const Vector3f p : sphere_points) { + const ColorVoxel* color_voxel; + EXPECT_TRUE(getVoxelAtPosition(color_layer, p, &color_voxel)); + if (color_voxel->weight >= 1.0f) { + ++num_points_on_sphere_surface_observed; + } + } + const float ratio_observed_surface_points = + static_cast(num_points_on_sphere_surface_observed) / + static_cast(num_surface_points_tested); + std::cout << "num_points_on_sphere_surface_observed: " + << num_points_on_sphere_surface_observed << std::endl; + std::cout << "num_surface_points_tested: " << num_surface_points_tested + << std::endl; + std::cout << "ratio_observed_surface_points: " + << ratio_observed_surface_points << std::endl; + EXPECT_GT(ratio_observed_surface_points, 0.5); + + // Check that all color blocks have a corresponding block in the tsdf layer + for (const Index3D block_idx : color_layer.getAllBlockIndices()) { + EXPECT_NE(gt_layer_.getBlockAtIndex(block_idx), nullptr); + } + + // Generate a mesh from the "reconstruction" + MeshIntegrator mesh_integrator; + BlockLayer mesh_layer(block_size_m_, MemoryType::kDevice); + EXPECT_TRUE( + mesh_integrator.integrateMeshFromDistanceField(gt_layer_, &mesh_layer)); + mesh_integrator.colorMesh(color_layer, &mesh_layer); + + // Write to file + io::outputMeshLayerToPly(mesh_layer, "color_sphere_mesh.ply"); +} + +TEST_F(ColorIntegrationTest, ColoredSpheres) { + constexpr float kTruncationDistanceVox = 2; + constexpr float truncation_distance_m_ = + kTruncationDistanceVox * voxel_size_m_; + + primitives::Scene scene_2; + + constexpr float kSphereRadius = 2.0f; + const Eigen::Vector3f center_1 = Vector3f(5.0f, 0.0f, 0.0f); + const Eigen::Vector3f center_2 = Vector3f(5.0f, 5.0f, 0.0f); + const Eigen::Vector3f center_3 = Vector3f(5.0f, 10.0f, 0.0f); + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + scene_2.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, -5.0f), + Vector3f(10.0f, 15.0f, 5.0f)); + // Create a scene with a ground plane and a sphere. + scene_2.addPrimitive( + std::make_unique(center_1, kSphereRadius)); + scene_2.addPrimitive( + std::make_unique(center_2, kSphereRadius)); + scene_2.addPrimitive( + std::make_unique(center_3, kSphereRadius)); + + // Camera + // Slightly narrow field of view so each image below sees only a single + // sphere. + constexpr float fu = 450; + constexpr float fv = 450; + constexpr int width = 640; + constexpr int height = 480; + constexpr float cu = static_cast(width_) / 2.0f; + constexpr float cv = static_cast(height_) / 2.0f; + Camera camera(fu, fv, cu, cv, width, height); + + // Simulate camera views + // Rotate 90 degress around the y axis to point camera-Z along world-X + Transform T_S_C1 = Transform::Identity(); + T_S_C1.prerotate( + Eigen::Quaternionf(Eigen::AngleAxisf(M_PI / 2, Vector3f::UnitY()))); + T_S_C1.pretranslate(Eigen::Vector3f(0.0f, 0.0f, 0.0f)); + Transform T_S_C2 = Transform::Identity(); + T_S_C2.prerotate( + Eigen::Quaternionf(Eigen::AngleAxisf(M_PI / 2, Vector3f::UnitY()))); + T_S_C2.pretranslate(Eigen::Vector3f(0.0f, 5.0f, 0.0f)); + Transform T_S_C3 = Transform::Identity(); + T_S_C3.prerotate( + Eigen::Quaternionf(Eigen::AngleAxisf(M_PI / 2, Vector3f::UnitY()))); + T_S_C3.pretranslate(Eigen::Vector3f(0.0f, 10.0f, 0.0f)); + + // TSDF + scene_2.generateSdfFromScene(truncation_distance_m_, >_layer_); + + // Color layer + ProjectiveColorIntegrator color_integrator; + ColorLayer color_layer(voxel_size_m_, MemoryType::kUnified); + + const auto color_1 = Color::Red(); + const auto color_2 = Color::Green(); + const auto color_3 = Color::Blue(); + + const auto image_1 = generateSolidColorImage(color_1, height_, width_); + const auto image_2 = generateSolidColorImage(color_2, height_, width_); + const auto image_3 = generateSolidColorImage(color_3, height_, width_); + + std::vector updated_blocks; + color_integrator.integrateFrame(image_1, T_S_C1, camera, gt_layer_, + &color_layer, &updated_blocks); + color_integrator.integrateFrame(image_2, T_S_C2, camera, gt_layer_, + &color_layer, &updated_blocks); + color_integrator.integrateFrame(image_3, T_S_C3, camera, gt_layer_, + &color_layer, &updated_blocks); + + // Generate a mesh from the "reconstruction" + MeshIntegrator mesh_integrator; + BlockLayer mesh_layer(block_size_m_, MemoryType::kUnified); + EXPECT_TRUE( + mesh_integrator.integrateMeshFromDistanceField(gt_layer_, &mesh_layer)); + mesh_integrator.colorMesh(color_layer, &mesh_layer); + + const float sphere_1_observed_ratio = + checkSphereColor(color_layer, center_1, kSphereRadius, color_1); + const float sphere_2_observed_ratio = + checkSphereColor(color_layer, center_2, kSphereRadius, color_2); + const float sphere_3_observed_ratio = + checkSphereColor(color_layer, center_3, kSphereRadius, color_3); + + EXPECT_GT(sphere_1_observed_ratio, 0.2); + EXPECT_GT(sphere_2_observed_ratio, 0.2); + EXPECT_GT(sphere_3_observed_ratio, 0.2); + + std::cout << "sphere_1_observed_ratio: " << sphere_1_observed_ratio + << std::endl; + std::cout << "sphere_2_observed_ratio: " << sphere_2_observed_ratio + << std::endl; + std::cout << "sphere_3_observed_ratio: " << sphere_3_observed_ratio + << std::endl; + + // Write to file + io::outputMeshLayerToPly(mesh_layer, "colored_spheres.ply"); +} + +TEST_F(ColorIntegrationTest, OcclusionTesting) { + primitives::Scene scene_3; + + // Scene: Two spheres, one occluded by the other. + scene_3.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, -5.0f), + Vector3f(15.0f, 15.0f, 5.0f)); + constexpr float kSphereRadius = 2.0f; + + const Vector3f center_1(5.0f, 0.0f, 0.0f); + const Vector3f center_2(10.0f, 0.0f, 0.0f); + + scene_3.addPrimitive( + std::make_unique(center_1, kSphereRadius)); + scene_3.addPrimitive( + std::make_unique(center_2, kSphereRadius)); + scene_3.generateSdfFromScene(truncation_distance_m_, >_layer_); + + // Viewpoint + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate( + Eigen::Quaternionf(Eigen::AngleAxisf(M_PI / 2, Vector3f::UnitY()))); + + // Integrate a color image + ProjectiveColorIntegrator color_integrator; + ColorLayer color_layer(voxel_size_m_, MemoryType::kUnified); + + const auto color_1 = Color::Red(); + const auto image_1 = generateSolidColorImage(color_1, height_, width_); + + std::vector updated_blocks; + color_integrator.integrateFrame(image_1, T_S_C, camera_, gt_layer_, + &color_layer, &updated_blocks); + + // Check front sphere (observed voxels red) + const float sphere_1_observed_ratio = + checkSphereColor(color_layer, center_1, kSphereRadius, color_1); + EXPECT_GT(sphere_1_observed_ratio, 0.2); + std::cout << "sphere_1_observed_ratio: " << sphere_1_observed_ratio + << std::endl; + + // Check back sphere (no observed voxels) + const std::vector sphere_points = + getPointsOnASphere(kSphereRadius, center_2); + for (const Vector3f p : sphere_points) { + const ColorVoxel* color_voxel; + const bool block_allocated = + getVoxelAtPosition(color_layer, p, &color_voxel); + if (block_allocated) { + EXPECT_EQ(color_voxel->weight, 0.0f); + } + } + + // Generate a mesh from the "reconstruction" + MeshIntegrator mesh_integrator; + MeshLayer mesh_layer(block_size_m_, MemoryType::kUnified); + EXPECT_TRUE( + mesh_integrator.integrateMeshFromDistanceField(gt_layer_, &mesh_layer)); + mesh_integrator.colorMesh(color_layer, &mesh_layer); + io::outputMeshLayerToPly(mesh_layer, "colored_spheres_occluded.ply"); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_depth_image.cpp b/nvblox/tests/test_depth_image.cpp new file mode 100644 index 00000000..c3d1d317 --- /dev/null +++ b/nvblox/tests/test_depth_image.cpp @@ -0,0 +1,200 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/image.h" +#include "nvblox/core/interpolation_2d.h" +#include "nvblox/core/types.h" + +#include "nvblox/tests/gpu_image_routines.h" +#include "nvblox/tests/interpolation_2d_gpu.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +class DepthImageTest : public ::testing::Test { + protected: + void SetUp() override { + // Uninitialized depth frame + depth_frame_ = DepthImage(rows_, cols_, MemoryType::kUnified); + } + + Index2D getRandomPixel() { + return Index2D(test_utils::randomIntInRange(0., cols_ - 1), + test_utils::randomIntInRange(0, rows_ - 1)); + } + + int rows_ = 480; + int cols_ = 640; + DepthImage depth_frame_; +}; + +void setImageConstantOnCpu(const float value, DepthImage* depth_frame_ptr) { + // Set everything to 1.0 through one access method and check through the other + for (int row_idx = 0; row_idx < depth_frame_ptr->rows(); row_idx++) { + for (int col_idx = 0; col_idx < depth_frame_ptr->cols(); col_idx++) { + (*depth_frame_ptr)(row_idx, col_idx) = 1.0f; + } + } +} + +TEST_F(DepthImageTest, Host) { + // Set constant on CPU + setImageConstantOnCpu(1.0, &depth_frame_); + + // Check on the CPU + for (int lin_idx = 0; lin_idx < depth_frame_.numel(); lin_idx++) { + EXPECT_EQ(depth_frame_(lin_idx), 1.0f); + } +} + +TEST_F(DepthImageTest, Device) { + // Set constant on GPU + constexpr float kPixelValue = 1.0f; + test_utils::setImageConstantOnGpu(kPixelValue, &depth_frame_); + + // Check on the CPU + for (int lin_idx = 0; lin_idx < depth_frame_.numel(); lin_idx++) { + EXPECT_EQ(depth_frame_(lin_idx), 1.0f); + } +} + +TEST_F(DepthImageTest, DeviceReduction) { + // Make sure this is deterministic. + std::srand(0); + + // Set constant on CPU + constexpr float kPixelValue = 1.0f; + setImageConstantOnCpu(kPixelValue, &depth_frame_); + + // Change a single value on the CPU + constexpr float kMaxValue = 100.0f; + constexpr float kMinValue = -100.0f; + const Index2D u_max = getRandomPixel(); + Index2D u_min = u_max; + while ((u_min.array() == u_max.array()).all()) { + u_min = getRandomPixel(); + } + depth_frame_(u_max.y(), u_max.x()) = kMaxValue; + depth_frame_(u_min.y(), u_min.x()) = kMinValue; + + // Reduction on the GPU + const float max = image::max(depth_frame_); + const float min = image::min(depth_frame_); + const auto minmax = image::minmax(depth_frame_); + + // Check on the CPU + EXPECT_EQ(max, kMaxValue); + EXPECT_EQ(min, kMinValue); + EXPECT_EQ(minmax.first, kMinValue); + EXPECT_EQ(minmax.second, kMaxValue); +} + +TEST_F(DepthImageTest, LinearInterpolation) { + // The images {depth_frame_col_coords, depth_frame_row_coords} are set up such + // that if you interpolate, you should get the interpolated position back. + DepthImage depth_frame_col_coords(rows_, cols_, MemoryType::kUnified); + DepthImage depth_frame_row_coords(rows_, cols_, MemoryType::kUnified); + for (int col_idx = 0; col_idx < cols_; col_idx++) { + for (int row_idx = 0; row_idx < rows_; row_idx++) { + depth_frame_col_coords(row_idx, col_idx) = + static_cast(col_idx) + 0.5f; + depth_frame_row_coords(row_idx, col_idx) = + static_cast(row_idx) + 0.5f; + } + } + constexpr int kNumTests = 1000; + // int num_failures = 0; + for (int i = 0; i < kNumTests; i++) { + // Random pixel location on image plane + const Vector2f u_px(test_utils::randomFloatInRange( + 0.5f, static_cast(cols_ - 1) + 0.5f), + test_utils::randomFloatInRange( + 0.5f, static_cast(rows_ - 1) + 0.5f)); + // Interpolate x and y grids + float interpolated_value_col; + EXPECT_TRUE(interpolation::interpolate2DLinear(depth_frame_col_coords, u_px, + &interpolated_value_col)); + float interpolated_value_row; + EXPECT_TRUE(interpolation::interpolate2DLinear(depth_frame_row_coords, u_px, + &interpolated_value_row)); + // Check result + EXPECT_NEAR(interpolated_value_col, u_px.x(), kFloatEpsilon); + EXPECT_NEAR(interpolated_value_row, u_px.y(), kFloatEpsilon); + } +} + +TEST_F(DepthImageTest, DeepCopy) { + // Set constant on CPU + constexpr float kPixelValue = 1.0f; + setImageConstantOnCpu(kPixelValue, &depth_frame_); + + // Copy + DepthImage copy(depth_frame_); + + // Check the copy is actually a copy + for (int lin_idx = 0; lin_idx < copy.numel(); lin_idx++) { + EXPECT_EQ(copy(lin_idx), kPixelValue); + } +} + +TEST_F(DepthImageTest, InterpolationGPU) { + // Tiny images + DepthImage image_x(2, 2, MemoryType::kUnified); + image_x(0, 0) = 0.5f; + image_x(0, 1) = 1.5f; + image_x(1, 0) = 0.5f; + image_x(1, 1) = 1.5f; + DepthImage image_y(2, 2, MemoryType::kUnified); + image_y(0, 0) = 0.5f; + image_y(0, 1) = 0.5f; + image_y(1, 0) = 1.5f; + image_y(1, 1) = 1.5f; + + constexpr int kNumTests = 1000; + std::vector u_px_vec; + u_px_vec.reserve(kNumTests); + for (int i = 0; i < kNumTests; i++) { + u_px_vec.push_back( + Eigen::Vector2f(test_utils::randomFloatInRange(0.5f, 1.5f), + test_utils::randomFloatInRange(0.5f, 1.5f))); + } + + std::vector values_x(kNumTests, 1.0f); + std::vector success_flags_x(kNumTests, 0); + test_utils::linearInterpolateImageGpu(image_x, u_px_vec, &values_x, + &success_flags_x); + std::vector values_y(kNumTests, 1.0f); + std::vector success_flags_y(kNumTests, 0); + test_utils::linearInterpolateImageGpu(image_y, u_px_vec, &values_y, + &success_flags_y); + + for (int i = 0; i < kNumTests; i++) { + EXPECT_TRUE(success_flags_x[i] == 1); + EXPECT_NEAR(values_x[i], u_px_vec[i].x(), kFloatEpsilon); + EXPECT_NEAR(values_y[i], u_px_vec[i].y(), kFloatEpsilon); + } +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_esdf_integrator.cpp b/nvblox/tests/test_esdf_integrator.cpp new file mode 100644 index 00000000..f8b27d48 --- /dev/null +++ b/nvblox/tests/test_esdf_integrator.cpp @@ -0,0 +1,883 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/esdf_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/io/pointcloud_io.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +// Have various obstacles that the ESDF can try to navigate around. +enum class Obstacle { + kAxisAlignedPlane, // 0 + kAngledPlane, // 1 + kSphereOrigin, // 2 + kBox, // 3 + kBoxWithSphere, // 4 + kBoxWithCube // 5 +}; + +class EsdfIntegratorTest : public ::testing::TestWithParam { + protected: + void SetUp() override; + + void addParameterizedObstacleToScene(const Obstacle& obstacle); + + bool outputFlatSliceEsdfAsPly(const EsdfLayer& layer, + const std::string& ply_path, float height); + bool outputFlatSliceTsdfAsPly(const TsdfLayer& layer, + const std::string& ply_path, float height); + + // Returns PERCENTAGE ABOVE THRESHOLD + float compareEsdfToGt(const EsdfLayer& esdf_layer, const TsdfLayer& gt_layer, + float error_threshold); + + // Returns PERCENTAGE ABOVE THRESHOLD + float compareEsdfToEsdf(const EsdfLayer& esdf_layer, + const EsdfLayer& gt_layer, float error_threshold); + + // Returns if all the voxels in the ESDF are valid. + bool validateEsdf(const EsdfLayer& esdf_layer, + float max_squared_distance_vox); + + bool output_pointclouds_ = false; + float block_size_; + float voxel_size_ = 0.10f; + float max_distance_ = 4.0f; + float very_small_cutoff_ = 2e-3f; + float small_cutoff_ = 2e-2f; + + TsdfLayer::Ptr tsdf_layer_; + TsdfLayer::Ptr gt_sdf_layer_; + EsdfLayer::Ptr esdf_layer_; + + EsdfIntegrator esdf_integrator_; + + // A simulation scene. + primitives::Scene scene_; + + // Camera and TSDF parameters for incremental integration. + std::shared_ptr camera_; + ProjectiveTsdfIntegrator tsdf_integrator_; +}; + +void EsdfIntegratorTest::SetUp() { + timing::Timing::Reset(); + std::srand(0); + block_size_ = VoxelBlock::kVoxelsPerSide * voxel_size_; + + tsdf_layer_.reset(new TsdfLayer(voxel_size_, MemoryType::kUnified)); + gt_sdf_layer_.reset(new TsdfLayer(voxel_size_, MemoryType::kUnified)); + esdf_layer_.reset(new EsdfLayer(voxel_size_, MemoryType::kUnified)); + + esdf_integrator_.max_distance_m() = max_distance_; + esdf_integrator_.min_weight() = 1.0f; + + camera_.reset(new Camera(300, 300, 320, 240, 640, 480)); +} + +void EsdfIntegratorTest::addParameterizedObstacleToScene( + const Obstacle& obstacle) { + if (obstacle == Obstacle::kBox || obstacle == Obstacle::kBoxWithCube || + obstacle == Obstacle::kBoxWithSphere) { + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-5.5f, -5.5f, -0.5f), + Vector3f(5.5f, 5.5f, 5.5f)); + scene_.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + scene_.addGroundLevel(0.0f); + scene_.addCeiling(5.0f); + } else { + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-3.0f, -3.0f, 0.0f), + Vector3f(3.0f, 3.0f, 3.0f)); + } + + switch (obstacle) { + case Obstacle::kAxisAlignedPlane: + // Plane at the origin pointing in the -x direction. + scene_.addPrimitive(std::make_unique( + Vector3f(0.05, 0.05, 0.05), Vector3f(-1, 0, 0).normalized())); + break; + case Obstacle::kAngledPlane: + // Plane at the origin pointing in the -x -y direction. + scene_.addPrimitive(std::make_unique( + Vector3f(0.0, 0.0, 0.0), Vector3f(1, 1, 0).normalized())); + break; + case Obstacle::kSphereOrigin: + // Create a scene that's just a sphere + scene_.addPrimitive( + std::make_unique(Vector3f(0.0, 0.0, 0.0), 2.0)); + break; + case Obstacle::kBoxWithSphere: + scene_.addPrimitive(std::make_unique( + Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + break; + case Obstacle::kBoxWithCube: + scene_.addPrimitive(std::make_unique( + Vector3f(0.0f, 0.0f, 2.0f), Vector3f(2.0f, 2.0f, 2.0f))); + break; + default: + break; + }; +} + +bool EsdfIntegratorTest::outputFlatSliceEsdfAsPly(const EsdfLayer& layer, + const std::string& ply_path, + float height) { + // Create a ply writer object. + io::PlyWriter writer(ply_path); + + // Combine all the voxels in the mesh into a pointcloud. + std::vector points; + std::vector distances; + + const float block_size = layer.block_size(); + const float voxel_size = layer.voxel_size(); + + auto lambda = [&points, &distances, &block_size, &voxel_size, &height]( + const Index3D& block_index, const Index3D& voxel_index, + const EsdfVoxel* voxel) { + if (voxel->observed) { + Vector3f position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + if (position.z() - height < voxel_size && position.z() >= height) { + points.push_back(position); + float distance = voxel_size * std::sqrt(voxel->squared_distance_vox); + if (voxel->is_inside) { + distance = -distance; + } + distances.push_back(distance); + } + } + }; + + // Call above lambda on every voxel in the layer. + callFunctionOnAllVoxels(layer, lambda); + + // Add the pointcloud to the ply writer. + writer.setPoints(&points); + writer.setIntensities(&distances); + + // Write out the ply. + return writer.write(); +} + +bool EsdfIntegratorTest::outputFlatSliceTsdfAsPly(const TsdfLayer& layer, + const std::string& ply_path, + float height) { + // Create a ply writer object. + io::PlyWriter writer(ply_path); + + // Combine all the voxels in the mesh into a pointcloud. + std::vector points; + std::vector distances; + + const float block_size = layer.block_size(); + const float voxel_size = layer.voxel_size(); + + auto lambda = [&points, &distances, &block_size, &voxel_size, &height]( + const Index3D& block_index, const Index3D& voxel_index, + const TsdfVoxel* voxel) { + if (voxel->weight > 1e-4f) { + Vector3f position = getCenterPostionFromBlockIndexAndVoxelIndex( + block_size, block_index, voxel_index); + if (position.z() - height < voxel_size && position.z() >= height) { + points.push_back(position); + distances.push_back(voxel->distance); + } + } + }; + + // Call above lambda on every voxel in the layer. + callFunctionOnAllVoxels(layer, lambda); + + // Add the pointcloud to the ply writer. + writer.setPoints(&points); + writer.setIntensities(&distances); + + // Write out the ply. + return writer.write(); +} + +float EsdfIntegratorTest::compareEsdfToGt(const EsdfLayer& esdf_layer, + const TsdfLayer& gt_layer, + float error_threshold) { + // Compare the layers + int total_num_voxels_observed = 0; + int num_voxels_over_threshold = 0; + for (const Index3D& block_index : esdf_layer.getAllBlockIndices()) { + const auto block_esdf = esdf_layer.getBlockAtIndex(block_index); + const auto block_gt = gt_layer.getBlockAtIndex(block_index); + if (!block_esdf || !block_gt) { + continue; + } + for (int x = 0; x < VoxelBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < VoxelBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < VoxelBlock::kVoxelsPerSide; z++) { + float distance = + esdf_layer.voxel_size() * + std::sqrt(block_esdf->voxels[x][y][z].squared_distance_vox); + if (block_esdf->voxels[x][y][z].is_inside) { + distance = -distance; + } + float diff = distance - block_gt->voxels[x][y][z].distance; + + if (block_esdf->voxels[x][y][z].observed) { + ++total_num_voxels_observed; + if (std::abs(diff) > error_threshold) { + ++num_voxels_over_threshold; + } + } + } + } + } + } + LOG(INFO) << "Num voxels observed: " << total_num_voxels_observed + << " over threshold: " << num_voxels_over_threshold; + + return static_cast(num_voxels_over_threshold) / + total_num_voxels_observed; +} + +float EsdfIntegratorTest::compareEsdfToEsdf(const EsdfLayer& esdf_layer, + const EsdfLayer& gt_layer, + float error_threshold) { + // Compare the layers + int total_num_voxels_observed = 0; + int num_voxels_over_threshold = 0; + for (const Index3D& block_index : esdf_layer.getAllBlockIndices()) { + const auto block_esdf = esdf_layer.getBlockAtIndex(block_index); + CHECK_NOTNULL(block_esdf.get()); + const auto block_gt = gt_layer.getBlockAtIndex(block_index); + if (!block_esdf || !block_gt) { + continue; + } + for (int x = 0; x < VoxelBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < VoxelBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < VoxelBlock::kVoxelsPerSide; z++) { + float distance = + esdf_layer.voxel_size() * + std::sqrt(block_esdf->voxels[x][y][z].squared_distance_vox); + if (block_esdf->voxels[x][y][z].is_inside) { + distance = -distance; + } + float distance_gt = + esdf_layer.voxel_size() * + std::sqrt(block_gt->voxels[x][y][z].squared_distance_vox); + if (block_gt->voxels[x][y][z].is_inside) { + distance_gt = -distance_gt; + } + float diff = distance - distance_gt; + + if (block_esdf->voxels[x][y][z].observed) { + ++total_num_voxels_observed; + if (std::abs(diff) > error_threshold) { + ++num_voxels_over_threshold; + } + } + } + } + } + } + LOG(INFO) << "Num ESDF voxels observed: " << total_num_voxels_observed + << " over threshold: " << num_voxels_over_threshold; + + return static_cast(num_voxels_over_threshold) / + total_num_voxels_observed; +} + +bool EsdfIntegratorTest::validateEsdf(const EsdfLayer& esdf_layer, + float max_squared_distance_vox) { + constexpr float kTolerance = 1e-4; + constexpr int kVoxelsPerSide = VoxelBlock::kVoxelsPerSide; + for (const Index3D& block_index : esdf_layer.getAllBlockIndices()) { + const auto block = esdf_layer.getBlockAtIndex(block_index); + if (!block) { + continue; + } + for (int x = 0; x < kVoxelsPerSide; x++) { + for (int y = 0; y < kVoxelsPerSide; y++) { + for (int z = 0; z < kVoxelsPerSide; z++) { + const EsdfVoxel& voxel = block->voxels[x][y][z]; + + if (!voxel.observed) { + continue; + } + // If the voxel is a site, its parent must be 0, 0, 0 and vice versa. + if (voxel.parent_direction == Index3D::Zero()) { + if (!voxel.is_site && + voxel.squared_distance_vox < max_squared_distance_vox && + voxel.squared_distance_vox >= kTolerance) { + LOG(ERROR) << "Wrong distance for zero parent voxel! " + << voxel.parent_direction.transpose() << " " + << voxel.squared_distance_vox << "/" + << max_squared_distance_vox; + return false; + } + } else { + // First check tht the distance matches. + if (voxel.squared_distance_vox < max_squared_distance_vox && + voxel.squared_distance_vox - + voxel.parent_direction.squaredNorm() >= + kTolerance) { + LOG(ERROR) << "Distance doesn't match parent direction."; + return false; + } + + // If a voxel has a parent direction, it should definitely point to + // a site voxel. + + // Get the parent. + Index3D parent_index = Index3D(x, y, z) + voxel.parent_direction; + + // Check if the voxel is within the same block. + if (parent_index.x() < 0 || parent_index.x() >= kVoxelsPerSide || + parent_index.y() < 0 || parent_index.y() >= kVoxelsPerSide || + parent_index.z() < 0 || parent_index.z() >= kVoxelsPerSide) { + // Then we need to get the block index. + Index3D neighbor_block_index = block_index; + Index3D neighbor_voxel_index = parent_index; + + // Find the parent index. + while (parent_index.x() >= kVoxelsPerSide) { + parent_index.x() -= kVoxelsPerSide; + neighbor_block_index.x()++; + } + while (parent_index.y() >= kVoxelsPerSide) { + parent_index.y() -= kVoxelsPerSide; + neighbor_block_index.y()++; + } + while (parent_index.z() >= kVoxelsPerSide) { + parent_index.z() -= kVoxelsPerSide; + neighbor_block_index.z()++; + } + + while (parent_index.x() < 0) { + parent_index.x() += kVoxelsPerSide; + neighbor_block_index.x()--; + } + while (parent_index.y() < 0) { + parent_index.y() += kVoxelsPerSide; + neighbor_block_index.y()--; + } + while (parent_index.z() < 0) { + parent_index.z() += kVoxelsPerSide; + neighbor_block_index.z()--; + } + + EsdfBlock::ConstPtr neighbor_block = + esdf_layer.getBlockAtIndex(neighbor_block_index); + if (!neighbor_block) { + LOG(ERROR) << "Neighbor block does not exist!"; + return false; + } + const EsdfVoxel* neighbor_voxel = + &neighbor_block->voxels[parent_index.x()][parent_index.y()] + [parent_index.z()]; + if (!neighbor_voxel->is_site) { + LOG(ERROR) << "Parent in different block but is not a site!"; + LOG(ERROR) << "Block index: " + << neighbor_block_index.transpose() + << " voxel index: " << parent_index.transpose(); + return false; + } + } else { + // Ok check if the parent index is a site. + if (!block + ->voxels[parent_index.x()][parent_index.y()] + [parent_index.z()] + .is_site) { + LOG(ERROR) << "Parent within the same block but is not a site!"; + return false; + } + } + } + if (voxel.is_site) { + if (voxel.parent_direction != Index3D::Zero()) { + LOG(ERROR) << "Site voxel has a parent!"; + return false; + } + if (voxel.squared_distance_vox >= kTolerance) { + LOG(ERROR) << "Site voxel has non-zero distance! " + << voxel.squared_distance_vox; + return false; + } + } + } + } + } + } + return true; +} + +TEST_P(EsdfIntegratorTest, SingleEsdfTestCPU) { + // Create a scene that's just a plane. + addParameterizedObstacleToScene(GetParam()); + + // Generate a TSDF + scene_.generateSdfFromScene(4 * voxel_size_, tsdf_layer_.get()); + // Get the full ground truth ESDF + scene_.generateSdfFromScene(max_distance_, gt_sdf_layer_.get()); + + // Actually run the ESDF generation. + std::vector block_indices = tsdf_layer_->getAllBlockIndices(); + esdf_integrator_.integrateBlocksOnCPU(*tsdf_layer_, block_indices, + esdf_layer_.get()); + + // Compare the results to GT. + EXPECT_LE(compareEsdfToGt(*esdf_layer_, *gt_sdf_layer_, voxel_size_), + very_small_cutoff_); + + // Check if all parents point the correct directions, etc. + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + + float slice_height = 1.0f; + std::string obstacle_string = std::to_string(static_cast(GetParam())); + outputFlatSliceEsdfAsPly( + *esdf_layer_, "test_esdf_cpu_" + obstacle_string + "_esdf_slice.ply", + 1.0f); + outputFlatSliceTsdfAsPly( + *tsdf_layer_, "test_esdf_cpu_" + obstacle_string + "_tsdf_slice.ply", + 1.0f); + + if (output_pointclouds_) { + io::outputVoxelLayerToPly(*esdf_layer_, + "test_esdf_cpu_" + obstacle_string + "_esdf.ply"); + io::outputVoxelLayerToPly(*gt_sdf_layer_, + "test_esdf_cpu_" + obstacle_string + "_gt.ply"); + io::outputVoxelLayerToPly(*tsdf_layer_, + "test_esdf_cpu_" + obstacle_string + "_tsdf.ply"); + } + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, SingleEsdfTestGPU) { + // Create a scene that's just an object. + addParameterizedObstacleToScene(GetParam()); + + // Generate a TSDF + scene_.generateSdfFromScene(4 * voxel_size_, tsdf_layer_.get()); + // Get the full ground truth ESDF + scene_.generateSdfFromScene(max_distance_, gt_sdf_layer_.get()); + + // Actually run the ESDF generation. + std::vector block_indices = tsdf_layer_->getAllBlockIndices(); + esdf_integrator_.integrateBlocksOnGPU(*tsdf_layer_, block_indices, + esdf_layer_.get()); + + // Compare the results to GT. + EXPECT_LE(compareEsdfToGt(*esdf_layer_, *gt_sdf_layer_, voxel_size_), + very_small_cutoff_); + + // Check if all parents point the correct directions, etc. + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + + if (output_pointclouds_) { + std::string obstacle_string = std::to_string(static_cast(GetParam())); + io::outputVoxelLayerToPly(*esdf_layer_, + "test_esdf_gpu_" + obstacle_string + "_esdf.ply"); + io::outputVoxelLayerToPly(*gt_sdf_layer_, + "test_esdf_gpu_" + obstacle_string + "_gt.ply"); + io::outputVoxelLayerToPly(*tsdf_layer_, + "test_esdf_gpu_" + obstacle_string + "_tsdf.ply"); + } + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, ComplexSceneWithTsdf) { + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 80; + + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 15.0; + + Obstacle obstacle = GetParam(); + // Skip the non-box cases: + if (obstacle != Obstacle::kBoxWithSphere && + obstacle != Obstacle::kBoxWithCube && obstacle != Obstacle::kBox) { + return; + } + addParameterizedObstacleToScene(obstacle); + + // Get the ground truth SDF for it. + scene_.generateSdfFromScene(max_distance_, gt_sdf_layer_.get()); + + // Set up the integrator. + tsdf_integrator_.max_integration_distance_m(kMaxDist); + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Create a depth frame. We share this memory buffer for the entire + // trajectory. + DepthImage depth_image(camera_->height(), camera_->width(), + MemoryType::kUnified); + + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + scene_.generateDepthImageFromScene(*camera_, T_S_C, kMaxDist, &depth_image); + + // Integrate this depth image. + tsdf_integrator_.integrateFrame(depth_image, T_S_C, *camera_, + tsdf_layer_.get()); + } + + // Actually run the ESDF generation. + esdf_integrator_.integrateLayer(*tsdf_layer_, esdf_layer_.get()); + + // Compare the results to GT. + // Allow within 1 voxel sizes. + EXPECT_LE(compareEsdfToGt(*esdf_layer_, *gt_sdf_layer_, 4 * voxel_size_), + 0.20); + + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + + if (output_pointclouds_) { + // Output the layer for inspection. + outputFlatSliceEsdfAsPly(*esdf_layer_, "test_esdf_slice_complex.ply", 1.0f); + outputFlatSliceTsdfAsPly(*gt_sdf_layer_, "test_esdf_slice_complex_gt.ply", + 1.0f); + io::outputVoxelLayerToPly(*gt_sdf_layer_, + "test_esdf_slice_complex_gt_full.ply"); + } + + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, IncrementalTsdfAndEsdfWithObjectRemovalGPU) { + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 1; + + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 15.0; + + // Create a batch layer to batch to. + EsdfLayer esdf_layer_batch(voxel_size_, MemoryType::kUnified); + + Obstacle obstacle = GetParam(); + // Skip the non-box cases: + if (obstacle != Obstacle::kBoxWithSphere && + obstacle != Obstacle::kBoxWithCube && obstacle != Obstacle::kBox) { + return; + } + addParameterizedObstacleToScene(obstacle); + + // Set up the integrator. + tsdf_integrator_.max_integration_distance_m(kMaxDist); + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Create a depth frame. We share this memory buffer for the entire + // trajectory. + DepthImage depth_image(camera_->height(), camera_->width(), + MemoryType::kUnified); + + for (size_t i = 0; i < kNumTrajectoryPoints * 2; i++) { + if (i == kNumTrajectoryPoints) { + // Clear the scene. + scene_.clear(); + addParameterizedObstacleToScene(Obstacle::kBox); + } + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + scene_.generateDepthImageFromScene(*camera_, T_S_C, kMaxDist, &depth_image); + + // Integrate this depth image. + std::vector updated_blocks; + tsdf_integrator_.integrateFrame(depth_image, T_S_C, *camera_, + tsdf_layer_.get(), &updated_blocks); + + // Run incremental ESDF generation. + esdf_integrator_.integrateBlocksOnGPU(*tsdf_layer_, updated_blocks, + esdf_layer_.get()); + } + + // Run batch ESDF generation to compare to. + esdf_integrator_.integrateLayer(*tsdf_layer_, &esdf_layer_batch); + + // Compare results to each other. Should really be basically identical. + EXPECT_LE(compareEsdfToEsdf(*esdf_layer_, esdf_layer_batch, voxel_size_), + small_cutoff_); + + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + + if (output_pointclouds_) { + float slice_height = 3.75f; + std::string obstacle_string = std::to_string(static_cast(obstacle)); + outputFlatSliceEsdfAsPly( + *esdf_layer_, "test_incremental_" + obstacle_string + "_esdf.ply", + slice_height); + outputFlatSliceEsdfAsPly( + esdf_layer_batch, + "test_incremental_" + obstacle_string + "_esdf_batch.ply", + slice_height); + io::outputVoxelLayerToPly( + *esdf_layer_, "test_incremental_" + obstacle_string + "_esdf_full.ply"); + } + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, IncrementalEsdf2DWithObjectRemoval) { + // Create a batch layer to batch to. + EsdfLayer esdf_layer_batch(voxel_size_, MemoryType::kUnified); + + Obstacle obstacle = GetParam(); + // Skip the non-box cases: + if (obstacle != Obstacle::kBoxWithSphere && + obstacle != Obstacle::kBoxWithCube && obstacle != Obstacle::kBox) { + return; + } + addParameterizedObstacleToScene(obstacle); + + AxisAlignedBoundingBox aabb(Vector3f(-5.0f, -5.0f, 1.0f - voxel_size_ / 2.0f), + Vector3f(5.0f, 5.0f, 1.0f + voxel_size_ / 2.0f)); + scene_.aabb() = aabb; + + // Get the ground truth SDF for it. + scene_.generateSdfFromScene(max_distance_, tsdf_layer_.get()); + + for (size_t i = 0; i < 2; i++) { + if (i == 1) { + tsdf_layer_->clear(); + // Clear the scene. + scene_.clear(); + addParameterizedObstacleToScene(Obstacle::kBox); + scene_.aabb() = aabb; + scene_.generateSdfFromScene(max_distance_, tsdf_layer_.get()); + } + + // Get all blocks from the tsdf layer. + std::vector updated_blocks; + updated_blocks = tsdf_layer_->getAllBlockIndices(); + + // Run incremental ESDF generation. + esdf_integrator_.integrateBlocksOnGPU(*tsdf_layer_, updated_blocks, + esdf_layer_.get()); + } + + // Run batch ESDF generation to compare to. + esdf_integrator_.integrateLayer(*tsdf_layer_, &esdf_layer_batch); + + // Compare results to each other. Should really be basically identical. + EXPECT_LE(compareEsdfToEsdf(*esdf_layer_, esdf_layer_batch, voxel_size_), + small_cutoff_); + + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + if (output_pointclouds_) { + float slice_height = 1.0f; + std::string obstacle_string = std::to_string(static_cast(obstacle)); + outputFlatSliceEsdfAsPly( + *esdf_layer_, "test_2d_" + obstacle_string + "_esdf.ply", slice_height); + outputFlatSliceEsdfAsPly(esdf_layer_batch, + "test_2d_" + obstacle_string + "_esdf_batch.ply", + slice_height); + outputFlatSliceTsdfAsPly(*tsdf_layer_, + "test_2d_" + obstacle_string + "_tsdf_batch.ply", + slice_height); + } + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, IncrementalEsdfSliceWithObjectRemovalGPU) { + // Create a batch layer to batch to. + EsdfLayer esdf_layer_batch(voxel_size_, MemoryType::kUnified); + + Obstacle obstacle = GetParam(); + // Skip the non-box cases: + if (obstacle != Obstacle::kBoxWithSphere && + obstacle != Obstacle::kBoxWithCube && obstacle != Obstacle::kBox) { + return; + } + + float min_z = 1.0f; + float max_z = 2.0f; + float output_z = 1.5f; + AxisAlignedBoundingBox aabb( + Vector3f(-5.0f, -5.0f, output_z - voxel_size_ / 2.0f), + Vector3f(5.0f, 5.0f, output_z + voxel_size_ / 2.0f)); + float tsdf_truncation_distance = 4 * voxel_size_; + + addParameterizedObstacleToScene(obstacle); + + scene_.aabb() = aabb; + + // Get the ground truth SDF for it. + scene_.generateSdfFromScene(tsdf_truncation_distance, tsdf_layer_.get()); + + for (size_t i = 0; i < 2; i++) { + if (i == 1) { + tsdf_layer_->clear(); + // Clear the scene. + scene_.clear(); + addParameterizedObstacleToScene(Obstacle::kBox); + scene_.aabb() = aabb; + scene_.generateSdfFromScene(tsdf_truncation_distance, tsdf_layer_.get()); + } + + // Get all blocks from the tsdf layer. + std::vector updated_blocks; + updated_blocks = tsdf_layer_->getAllBlockIndices(); + + // Run incremental ESDF generation. + esdf_integrator_.integrateSliceOnGPU(*tsdf_layer_, updated_blocks, min_z, + max_z, output_z, esdf_layer_.get()); + } + + // Run batch ESDF generation to compare to. + esdf_integrator_.integrateLayer(*tsdf_layer_, &esdf_layer_batch); + + // Compare results to each other. Should really be basically identical. + EXPECT_LE(compareEsdfToEsdf(*esdf_layer_, esdf_layer_batch, voxel_size_), + small_cutoff_); + + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + if (output_pointclouds_) { + float slice_height = output_z; + std::string obstacle_string = std::to_string(static_cast(obstacle)); + io::outputVoxelLayerToPly( + *esdf_layer_, "test_slice_" + obstacle_string + "_esdf_full.ply"); + outputFlatSliceEsdfAsPly( + esdf_layer_batch, "test_slice_" + obstacle_string + "_esdf_batch.ply", + slice_height); + io::outputVoxelLayerToPly( + *tsdf_layer_, "test_slice_" + obstacle_string + "_tsdf_batch.ply"); + } + std::cout << timing::Timing::Print(); +} + +TEST_P(EsdfIntegratorTest, IncrementalEsdfWithObjectRemoval) { + // Create a batch layer to batch to. + EsdfLayer esdf_layer_batch(voxel_size_, MemoryType::kUnified); + + Obstacle obstacle = GetParam(); + // Skip the non-box cases: + if (obstacle != Obstacle::kBoxWithSphere && + obstacle != Obstacle::kBoxWithCube && obstacle != Obstacle::kBox) { + return; + } + addParameterizedObstacleToScene(obstacle); + + // Get the ground truth SDF for it. + scene_.generateSdfFromScene(max_distance_, tsdf_layer_.get()); + + for (size_t i = 0; i < 2; i++) { + if (i == 1) { + // Clear the scene. + scene_.clear(); + addParameterizedObstacleToScene(Obstacle::kBox); + + scene_.generateSdfFromScene(max_distance_, tsdf_layer_.get()); + } + + // Get all blocks from the tsdf layer. + std::vector updated_blocks; + updated_blocks = tsdf_layer_->getAllBlockIndices(); + + // Run incremental ESDF generation. + esdf_integrator_.integrateBlocksOnGPU(*tsdf_layer_, updated_blocks, + esdf_layer_.get()); + } + + // Run batch ESDF generation to compare to. + esdf_integrator_.integrateLayer(*tsdf_layer_, &esdf_layer_batch); + + // Compare results to each other. Should really be basically identical. + EXPECT_LE(compareEsdfToEsdf(*esdf_layer_, esdf_layer_batch, voxel_size_), + small_cutoff_); + + EXPECT_TRUE(validateEsdf( + *esdf_layer_, esdf_integrator_.max_squared_distance_vox(voxel_size_))); + + if (output_pointclouds_) { + float slice_height = 3.5f; + std::string obstacle_string = std::to_string(static_cast(obstacle)); + outputFlatSliceEsdfAsPly(*esdf_layer_, + "test_object_" + obstacle_string + "_esdf.ply", + slice_height); + outputFlatSliceEsdfAsPly( + esdf_layer_batch, "test_object_" + obstacle_string + "_esdf_batch.ply", + slice_height); + io::outputVoxelLayerToPly( + *esdf_layer_, "test_object_" + obstacle_string + "_esdf_full.ply"); + } + std::cout << timing::Timing::Print(); +} + +INSTANTIATE_TEST_CASE_P( + ParameterizedEsdfTests, EsdfIntegratorTest, + ::testing::Values(Obstacle::kAxisAlignedPlane, Obstacle::kAngledPlane, + Obstacle::kSphereOrigin, Obstacle::kBox, + Obstacle::kBoxWithSphere, Obstacle::kBoxWithCube)); + +int main(int argc, char** argv) { + warmupCuda(); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_for_memory_leaks.cpp b/nvblox/tests/test_for_memory_leaks.cpp new file mode 100644 index 00000000..0252e80f --- /dev/null +++ b/nvblox/tests/test_for_memory_leaks.cpp @@ -0,0 +1,159 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/image.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/mesh/mesh_integrator.h" + +#include "nvblox/tests/test_utils_cuda.h" + +using namespace nvblox; + +std::pair getFreeGPUMemory() { + size_t free_bytes; + size_t total_bytes; + cudaMemGetInfo(&free_bytes, &total_bytes); + const int free_mb = free_bytes / 1e6; + const int total_mb = total_bytes / 1e6; + const float free_percent = + static_cast(free_mb) * 100.0f / static_cast(total_mb); + return {free_mb, free_percent}; +} + +TEST(MemoryLeakTest, UnifiedVectorInt) { + // Allocate a bunch of data + unified_vector vec; + constexpr int kNumTestingRounds = 10; + constexpr int kMegaBytesPerRound = 100; + constexpr int kNumVariablesToAddPerRound = + kMegaBytesPerRound * 1e6 / sizeof(int); + int start_free_gpu_memory_mb; + std::tie(start_free_gpu_memory_mb, std::ignore) = getFreeGPUMemory(); + for (int test_idx = 0; test_idx < kNumTestingRounds; test_idx++) { + for (int i = 0; i < kNumVariablesToAddPerRound; i++) { + vec.push_back(i); + } + test_utils::addOneToAllGPU(&vec); + // Check (only on the final round) + if (test_idx == (kNumTestingRounds - 1)) { + for (int i = 0; i < kNumVariablesToAddPerRound; i++) { + EXPECT_EQ(vec[i], i + 1); + } + } + vec.clear(); + // Debug + int free_gpu_memory_percent; + std::tie(std::ignore, free_gpu_memory_percent) = getFreeGPUMemory(); + std::cout << "Percentage free: " << free_gpu_memory_percent << "\%" + << std::endl; + } + int end_free_gpu_memory_mb; + std::tie(end_free_gpu_memory_mb, std::ignore) = getFreeGPUMemory(); + + // Check that there's approximately the same left over at the end + int reduction_in_free_gpu_memory_mb = + start_free_gpu_memory_mb - end_free_gpu_memory_mb; + std::cout << "(Int) Memory difference: " << reduction_in_free_gpu_memory_mb + << std::endl; + + // Check that memory isn't depleting. + // NOTE(alexmillane): We dont know what else is going on on the GPU but + // hopefully nothing that allocates 100Mb during this test run... + constexpr int kMaxAllowableMemoryReductionMb = 100; + // EXPECT_LT(reduction_in_free_gpu_memory_mb, kMaxAllowableMemoryReductionMb); +} + +TEST(MemoryLeakTest, 3DMatchMeshing) { + // Load 3dmatch image + const std::string base_path = "data/3dmatch"; + constexpr int seq_id = 1; + DepthImage depth_image_1; + ColorImage color_image_1; + EXPECT_TRUE(datasets::load16BitDepthImage( + datasets::threedmatch::getPathForDepthImage(base_path, seq_id, 0), + &depth_image_1)); + EXPECT_TRUE(datasets::load8BitColorImage( + datasets::threedmatch::getPathForColorImage(base_path, seq_id, 0), + &color_image_1)); + EXPECT_EQ(depth_image_1.width(), color_image_1.width()); + EXPECT_EQ(depth_image_1.height(), color_image_1.height()); + + // Parse 3x3 camera intrinsics matrix from 3D Match format: space-separated. + Eigen::Matrix3f camera_intrinsic_matrix; + EXPECT_TRUE(datasets::threedmatch::parseCameraFromFile( + datasets::threedmatch::getPathForCameraIntrinsics(base_path), + &camera_intrinsic_matrix)); + const auto camera = Camera::fromIntrinsicsMatrix( + camera_intrinsic_matrix, depth_image_1.width(), depth_image_1.height()); + + // Params + constexpr float kVoxelSizeM = 0.05f; + const float kBlockSizeM = VoxelBlock::kVoxelsPerSide * kVoxelSizeM; + ProjectiveTsdfIntegrator tsdf_integrator; + + // Memory start + int start_free_gpu_memory_mb; + std::tie(start_free_gpu_memory_mb, std::ignore) = getFreeGPUMemory(); + + constexpr int kNumRounds = 100; + // Generate a mesh + MeshIntegrator mesh_integrator; + MeshLayer mesh_layer_gpu(kBlockSizeM, MemoryType::kUnified); + for (int i = 0; i < kNumRounds; i++) { + std::cout << "i: " << i << std::endl; + // Integrate depth + TsdfLayer tsdf_layer(kVoxelSizeM, MemoryType::kDevice); + tsdf_integrator.integrateFrame(depth_image_1, Transform::Identity(), camera, + &tsdf_layer); + + EXPECT_TRUE(mesh_integrator.integrateMeshFromDistanceField( + tsdf_layer, &mesh_layer_gpu)); + tsdf_layer.clear(); + mesh_layer_gpu.clear(); + // Debug + int free_gpu_memory_percent; + std::tie(std::ignore, free_gpu_memory_percent) = getFreeGPUMemory(); + std::cout << "Percentage free: " << free_gpu_memory_percent << "\%" + << std::endl; + } + int end_free_gpu_memory_mb; + std::tie(end_free_gpu_memory_mb, std::ignore) = getFreeGPUMemory(); + + // Check that there's approximately the same left over at the end + int reduction_in_free_gpu_memory_mb = + start_free_gpu_memory_mb - end_free_gpu_memory_mb; + std::cout << "(3D Matching) Memory difference: " + << reduction_in_free_gpu_memory_mb << std::endl; + + // Check that memory isn't depleting. + // NOTE(alexmillane): We dont know what else is going on on the GPU but + // hopefully nothing that allocates 100Mb during this test run... + constexpr int kMaxAllowableMemoryReductionMb = 100; + // EXPECT_LT(reduction_in_free_gpu_memory_mb, kMaxAllowableMemoryReductionMb); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_frustum.cpp b/nvblox/tests/test_frustum.cpp new file mode 100644 index 00000000..ecad5e31 --- /dev/null +++ b/nvblox/tests/test_frustum.cpp @@ -0,0 +1,463 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/integrators/frustum.h" +#include "nvblox/integrators/ray_caster.h" +#include "nvblox/io/pointcloud_io.h" +#include "nvblox/primitives/scene.h" +#include "nvblox/utils/timing.h" + +using namespace nvblox; + +class FrustumTest : public ::testing::Test { + protected: + void SetUp() override { + timing::Timing::Reset(); + std::srand(0); + block_size_ = VoxelBlock::kVoxelsPerSide * voxel_size_; + + // Make the scene 6x6x3 meters big. + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-3.0f, -3.0f, 0.0f), + Vector3f(3.0f, 3.0f, 3.0f)); + + // Arbitrary camera + constexpr float fu = 300; + constexpr float fv = 300; + constexpr int width = 640; + constexpr int height = 480; + constexpr float cu = static_cast(width) / 2.0f; + constexpr float cv = static_cast(height) / 2.0f; + camera_.reset(new Camera(fu, fv, cu, cv, width, height)); + + base_path_ = "./data/3dmatch/"; + } + + static constexpr float kFloatEpsilon = 1e-4; + + float block_size_; + float voxel_size_ = 0.05; + + // A simulation scene. + primitives::Scene scene_; + + // A simulation camera. + std::unique_ptr camera_; + + // Base path for 3D Match dataset. + std::string base_path_; + + FrustumCalculator frustum_; +}; + +TEST_F(FrustumTest, FarPlaneImageTest) { + // We create a scene that is a flat plane 10 meters from the origin. + constexpr float kPlaneDistance = 10.0f; + float max_distance = kPlaneDistance; + scene_.addPrimitive(std::make_unique( + Vector3f(kPlaneDistance, 0.0, 0.0), Vector3f(-1, 0, 0))); + + // Create a pose at the origin looking forward. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_base); + + // Generate a depth frame with max distance == plane distance. + DepthImage depth_frame(camera_->height(), camera_->width(), + MemoryType::kUnified); + scene_.generateDepthImageFromScene(*camera_, T_S_C, 2 * kPlaneDistance, + &depth_frame); + + // Figure out what the GT should be. + std::vector blocks_in_view = FrustumCalculator::getBlocksInView( + T_S_C, *camera_, block_size_, max_distance); + std::vector blocks_in_image_view = + FrustumCalculator::getBlocksInImageView(depth_frame, T_S_C, *camera_, + block_size_, 0.0f, max_distance); + EXPECT_EQ(blocks_in_view.size(), blocks_in_image_view.size()); + + // Now get the actual thing to test. + std::vector blocks_in_cuda_view = frustum_.getBlocksInImageViewCuda( + depth_frame, T_S_C, *camera_, block_size_, 0.0f, + max_distance + kFloatEpsilon); + + // Sort all of the entries. + std::sort(blocks_in_view.begin(), blocks_in_view.end(), + VectorCompare()); + std::sort(blocks_in_image_view.begin(), blocks_in_image_view.end(), + VectorCompare()); + std::sort(blocks_in_cuda_view.begin(), blocks_in_cuda_view.end(), + VectorCompare()); + + size_t min_size = + std::min(blocks_in_cuda_view.size(), blocks_in_image_view.size()); + for (size_t i = 0; i < min_size; i++) { + EXPECT_EQ(blocks_in_view[i], blocks_in_image_view[i]); + } + + // We will now raycast through every single pixel in the original image. + for (int u = 0; u < camera_->rows(); u++) { + for (int v = 0; v < camera_->cols(); v++) { + // Get the depth at this image point. + float depth = depth_frame(u, v); + Vector3f p_C = depth * camera_->rayFromPixelIndices(Index2D(v, u)); + Vector3f p_L = T_S_C * p_C; + + Index3D block_index = getBlockIndexFromPositionInLayer(block_size_, p_L); + + EXPECT_TRUE(std::binary_search(blocks_in_cuda_view.begin(), + blocks_in_cuda_view.end(), block_index, + VectorCompare())) + << block_index; + // Now raycast back to the center. + // Ok raycast to the correct point in the block. + RayCaster raycaster(T_S_C.translation() / block_size_, p_L / block_size_); + Index3D ray_index = Index3D::Zero(); + while (raycaster.nextRayIndex(&ray_index)) { + EXPECT_TRUE(std::binary_search(blocks_in_cuda_view.begin(), + blocks_in_cuda_view.end(), ray_index, + VectorCompare())) + << ray_index; + } + } + } + + std::cout << timing::Timing::Print(); +} + +TEST_F(FrustumTest, PlaneWithGround) { + // We create a scene that is a flat plane 10 meters from the origin. + constexpr float kPlaneDistance = 10.0f; + float max_distance = kPlaneDistance; + scene_.addPrimitive(std::make_unique( + Vector3f(kPlaneDistance, 0.0, 0.0), Vector3f(-1, 0, 0))); + scene_.addGroundLevel(-1.0f); + + // Create a pose at the origin looking forward. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_base); + + // Generate a depth frame with max distance == plane distance. + DepthImage depth_frame(camera_->height(), camera_->width(), + MemoryType::kUnified); + scene_.generateDepthImageFromScene(*camera_, T_S_C, 2 * kPlaneDistance, + &depth_frame); + + // Figure out what the GT should be. + timing::Timer blocks_in_view_timer("blocks_in_view"); + std::vector blocks_in_view = FrustumCalculator::getBlocksInView( + T_S_C, *camera_, block_size_, max_distance); + blocks_in_view_timer.Stop(); + timing::Timer blocks_in_image_view_timer("blocks_in_image_view"); + std::vector blocks_in_image_view = + FrustumCalculator::getBlocksInImageView(depth_frame, T_S_C, *camera_, + block_size_, 0.0f, max_distance); + blocks_in_image_view_timer.Stop(); + + EXPECT_EQ(blocks_in_view.size(), blocks_in_image_view.size()); + + // Now get the actual thing to test. + timing::Timer blocks_in_cuda_view_timer("blocks_in_cuda_view"); + std::vector blocks_in_cuda_view = frustum_.getBlocksInImageViewCuda( + depth_frame, T_S_C, *camera_, block_size_, 0.0f, + max_distance + kFloatEpsilon); + EXPECT_LT(blocks_in_cuda_view.size(), blocks_in_image_view.size()); + blocks_in_cuda_view_timer.Stop(); + + // Sort all of the entries. + std::sort(blocks_in_view.begin(), blocks_in_view.end(), + VectorCompare()); + std::sort(blocks_in_image_view.begin(), blocks_in_image_view.end(), + VectorCompare()); + std::sort(blocks_in_cuda_view.begin(), blocks_in_cuda_view.end(), + VectorCompare()); + + for (size_t i = 0; i < blocks_in_image_view.size(); i++) { + EXPECT_EQ(blocks_in_view[i], blocks_in_image_view[i]); + } + + // Ok now the hard part. We expect the raycast to EVERY PIXEL to succeed + // in only going through allocated blocks. + // We make this easy by just making a TSDF layer. + TsdfLayer tsdf_layer(voxel_size_, nvblox::MemoryType::kUnified); + TsdfLayer tsdf_layer_cuda(voxel_size_, nvblox::MemoryType::kUnified); + + for (const Index3D& block_index : blocks_in_cuda_view) { + TsdfBlock::Ptr block = tsdf_layer_cuda.allocateBlockAtIndex(block_index); + for (int x = 0; x < TsdfBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < TsdfBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < TsdfBlock::kVoxelsPerSide; z++) { + block->voxels[x][y][z].weight = 1; + } + } + } + } + + // We will now raycast through every single pixel in the original image. + for (int u = 0; u < camera_->rows(); u++) { + for (int v = 0; v < camera_->cols(); v++) { + // Get the depth at this image point. + float depth = depth_frame(u, v); + Vector3f p_C = depth * camera_->rayFromPixelIndices(Index2D(v, u)); + Vector3f p_L = T_S_C * p_C; + + Index3D block_index = getBlockIndexFromPositionInLayer(block_size_, p_L); + + EXPECT_TRUE(tsdf_layer_cuda.isBlockAllocated(block_index)) << block_index; + + // Now raycast back to the center. + // Ok raycast to the correct point in the block. + RayCaster raycaster(T_S_C.translation() / block_size_, p_L / block_size_); + Index3D ray_index = Index3D::Zero(); + while (raycaster.nextRayIndex(&ray_index)) { + EXPECT_TRUE(tsdf_layer_cuda.isBlockAllocated(ray_index)) << ray_index; + + if (!tsdf_layer_cuda.isBlockAllocated(ray_index)) { + TsdfBlock::Ptr block = tsdf_layer.allocateBlockAtIndex(block_index); + for (int x = 0; x < TsdfBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < TsdfBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < TsdfBlock::kVoxelsPerSide; z++) { + block->voxels[x][y][z].weight = 1; + } + } + } + } + } + } + } + + bool output = false; + if (output) { + for (const Index3D& block_index : blocks_in_image_view) { + TsdfBlock::Ptr block = tsdf_layer.allocateBlockAtIndex(block_index); + for (int x = 0; x < TsdfBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < TsdfBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < TsdfBlock::kVoxelsPerSide; z++) { + block->voxels[x][y][z].weight = 1; + } + } + } + } + + io::writeToCsv("test_frustum_image.csv", depth_frame); + + io::outputVoxelLayerToPly(tsdf_layer, "test_frustum_blocks_image.ply"); + io::outputVoxelLayerToPly(tsdf_layer_cuda, "test_frustum_blocks_cuda.ply"); + } + std::cout << timing::Timing::Print(); +} + +// TODO(helen): fix this test with frustum checking. +TEST_F(FrustumTest, BlocksInView) { + // Arranging a situation where we have a predictable number of blocks in + // view We choose a situation with with a single z-colomn of blocks in + // view by making a camera with 4 pixels viewing the furthest blocks' + // corners. + + constexpr float kBlockSize = 1.0f; + + // Let's touch a collumn of 10 blocks in the z direction. + constexpr float kDistanceToBlockCenters = 9.5f; + + // Design a camera that just views the far blocks outer corners + constexpr float cu = 1.0; + constexpr float cv = 1.0; + constexpr int width = 2; + constexpr int height = 2; + // Pixel locations touched by the blocks upper-right corner. + // NOTE(alexmillane): we just slightly lengthen the focal length to not + // have boundary effects. + constexpr float u = static_cast(width); + constexpr float v = static_cast(height); + constexpr float fu = + static_cast(u - cu) * 2.0 * kDistanceToBlockCenters / kBlockSize + + 0.01; + constexpr float fv = + static_cast(v - cv) * 2.0 * kDistanceToBlockCenters / kBlockSize + + 0.01; + Camera camera(fu, fv, cu, cv, width, height); + + // Depth frame with 4 pixels, some pixels at the far block depth, some a + // the first. + DepthImage depth_frame(2, 2, MemoryType::kUnified); + depth_frame(0, 0) = kBlockSize / 2.0f; + depth_frame(1, 0) = kBlockSize / 2.0f; + depth_frame(0, 1) = kDistanceToBlockCenters; + depth_frame(1, 1) = kDistanceToBlockCenters; + + // Camera looking down z axis, sitting centered on a block center in x and + // y + Transform T_L_C; + T_L_C = Eigen::Translation3f(0.5f, 0.5f, 0.0f); + + // Get the block indices + constexpr float kMaxDist = 10.0f; + constexpr float kTruncationDistance = 0.0f; + const std::vector blocks_in_view = + FrustumCalculator::getBlocksInImageView(depth_frame, T_L_C, camera, + kBlockSize, kTruncationDistance, + kMaxDist); + + EXPECT_EQ(blocks_in_view.size(), 10); + for (int i = 0; i < blocks_in_view.size(); i++) { + EXPECT_TRUE((blocks_in_view[i].array() == Index3D(0, 0, i).array()).all()); + } +} + +TEST_F(FrustumTest, ThreeDMatch) { + // Get the first frame, a camera, and a pose. + constexpr int kSequenceNum = 1; + constexpr int kFrameNumber = 0; + float max_distance = 10.0f; + + std::unique_ptr> depth_image_loader = + datasets::threedmatch::createDepthImageLoader(base_path_, kSequenceNum); + + // Get the first image. + DepthImage depth_frame; + ASSERT_TRUE(depth_image_loader->getNextImage(&depth_frame)); + + // Get the tranform. + Transform T_L_C; + ASSERT_TRUE(datasets::threedmatch::parsePoseFromFile( + datasets::threedmatch::getPathForFramePose(base_path_, kSequenceNum, + kFrameNumber), + &T_L_C)); + + // Create a camera object. + int image_width = depth_frame.cols(); + int image_height = depth_frame.rows(); + const std::string intrinsics_filename = + datasets::threedmatch::getPathForCameraIntrinsics(base_path_); + Eigen::Matrix3f camera_intrinsics; + ASSERT_TRUE(datasets::threedmatch::parseCameraFromFile(intrinsics_filename, + &camera_intrinsics)); + Camera camera = Camera::fromIntrinsicsMatrix(camera_intrinsics, image_width, + image_height); + + for (int i = 0; i < 100; i++) { + // Now get the actual thing to test. + timing::Timer blocks_in_cuda_view_timer("blocks_in_cuda_view"); + std::vector blocks_in_cuda_view = + frustum_.getBlocksInImageViewCuda(depth_frame, T_L_C, camera, + block_size_, 0.0f, max_distance); + blocks_in_cuda_view_timer.Stop(); + + // Figure out what the GT should be. + timing::Timer blocks_in_view_timer("blocks_in_view"); + std::vector blocks_in_view = FrustumCalculator::getBlocksInView( + T_L_C, camera, block_size_, max_distance); + blocks_in_view_timer.Stop(); + timing::Timer blocks_in_image_view_timer("blocks_in_image_view"); + std::vector blocks_in_image_view = + FrustumCalculator::getBlocksInImageView( + depth_frame, T_L_C, camera, block_size_, 0.0f, max_distance); + blocks_in_image_view_timer.Stop(); + } + + std::cout << timing::Timing::Print(); +} + +class FrustumRayTracingSubsamplingTest + : public FrustumTest, + public ::testing::WithParamInterface { + protected: + // Yo dawg I heard you like params +}; + +TEST_P(FrustumRayTracingSubsamplingTest, RayTracePixels) { + // Arranging a situation where we have a predictable number of blocks in + // view + // |--|--| + // |--|--| + // |--|--| + // |--|--| + // \ / + // * --camera + constexpr float kBlockSize = 1.0f; + + // Let's touch a 2x2x3 collumn of blocks in the z direction. + constexpr float kDistanceToBlockCenters = 2.5f; + + // Design a camera that just views the far blocks outer corners + constexpr int width = 3; + constexpr int height = 3; + constexpr float cu = static_cast(width - 1) / 2.0f; + constexpr float cv = static_cast(height - 1) / 2.0f; + // Calculate focal lengths such that the extreme pixels shoot rays though back + // blocks' centers + constexpr float u = static_cast(width - 1); + constexpr float v = static_cast(height - 1); + constexpr float fu = static_cast(u - cu) * kDistanceToBlockCenters / + (0.5f * kBlockSize); + constexpr float fv = static_cast(v - cv) * kDistanceToBlockCenters / + (0.5f * kBlockSize); + Camera camera(fu, fv, cu, cv, width, height); + + // Depth frame with 4 pixels, some pixels at the far block depth, some a + // the first. + DepthImage depth_frame(height, width, MemoryType::kUnified); + for (int lin_idx = 0; lin_idx < depth_frame.numel(); lin_idx++) { + depth_frame(lin_idx) = kDistanceToBlockCenters; + } + + // Camera looking down z axis, sitting between blocks the in x and + // y dimensions + Transform T_L_C; + T_L_C = Eigen::Translation3f(1.0f, 1.0f, 0.0f); + + FrustumCalculator frustum_calculator; + + unsigned int raycast_subsampling_factor = GetParam(); + frustum_calculator.raycast_subsampling_factor(raycast_subsampling_factor); + + const std::vector blocks_in_view = + frustum_calculator.getBlocksInImageViewCuda( + depth_frame, T_L_C, camera, kBlockSize, 0.0, + kDistanceToBlockCenters + 1.0f); + + std::for_each(blocks_in_view.begin(), blocks_in_view.end(), + [](const auto& block_idx) { + EXPECT_TRUE(block_idx.x() == 0 || block_idx.x() == 1); + EXPECT_TRUE(block_idx.y() == 0 || block_idx.y() == 1); + EXPECT_TRUE(block_idx.z() == 0 || block_idx.z() == 1 || + block_idx.z() == 2); + }); + + // 2 x 2 x 3 block volume + EXPECT_EQ(blocks_in_view.size(), 12); +} + +INSTANTIATE_TEST_CASE_P(FrustumTest, FrustumRayTracingSubsamplingTest, + ::testing::Values(1, 2)); + +int main(int argc, char** argv) { + warmupCuda(); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_gpu_layer_view.cpp b/nvblox/tests/test_gpu_layer_view.cpp new file mode 100644 index 00000000..ac2b11f0 --- /dev/null +++ b/nvblox/tests/test_gpu_layer_view.cpp @@ -0,0 +1,201 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" + +#include "nvblox/gpu_hash/gpu_layer_view.h" + +#include "nvblox/primitives/primitives.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/gpu_layer_utils.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +TEST(GpuHashTest, CopyOverLayer) { + // Indices to insert in the layer + std::vector block_indices; + block_indices.push_back({0, 0, 0}); + block_indices.push_back({1, 0, 0}); + block_indices.push_back({2, 0, 0}); + + // Create a layer with a single block + const float voxel_size = 0.1f; + TsdfLayer tsdf_layer(voxel_size, MemoryType::kUnified); + std::for_each(block_indices.begin(), block_indices.end(), + [&tsdf_layer](const Index3D& idx) { + tsdf_layer.allocateBlockAtIndex(idx); + }); + + // Copy block hash to GPU + GPULayerView gpu_layer = tsdf_layer.getGpuLayerView(); + + // Search for indices on the GPU + block_indices.push_back({3, 0, 0}); + const std::vector flags = + test_utils::getContainsFlags(gpu_layer, block_indices); + + EXPECT_EQ(flags.size(), block_indices.size()); + EXPECT_TRUE(flags[0]); + EXPECT_TRUE(flags[1]); + EXPECT_TRUE(flags[2]); + EXPECT_FALSE(flags[3]); +} + +TEST(GpuHashTest, SphereSceneAccessTest) { + // Sphere in a box scene. + primitives::Scene scene; + const Vector3f bounds_min = Vector3f(-5.0f, -5.0f, 0.0f); + const Vector3f bounds_max = Vector3f(5.0f, 5.0f, 5.0f); + scene.aabb() = AxisAlignedBoundingBox(bounds_min, bounds_max); + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Layer + constexpr float kVoxelSize = 0.1f; + TsdfLayer tsdf_layer(kVoxelSize, MemoryType::kUnified); + + // Generate SDF + constexpr float kMaxDistance = 5.0f; + scene.generateSdfFromScene(kMaxDistance, &tsdf_layer); + + // Get some random points in the scene + constexpr int kNumPoints = 1000; + std::vector p_L_vec(kNumPoints); + std::generate(p_L_vec.begin(), p_L_vec.end(), [&bounds_min, &bounds_max]() { + return test_utils::getRandomVector3fInRange(bounds_min, bounds_max); + }); + + // Lookup voxels (CPU) + std::vector cpu_lookup_voxels; + cpu_lookup_voxels.reserve(p_L_vec.size()); + for (const Vector3f& p_L : p_L_vec) { + // TODO(alexmillane): Let's also add this as a member function + const TsdfVoxel* voxel_ptr; + EXPECT_TRUE(getVoxelAtPosition(tsdf_layer, p_L, &voxel_ptr)); + cpu_lookup_voxels.push_back(*voxel_ptr); + } + + // CPU -> GPU + GPULayerView gpu_layer = tsdf_layer.getGpuLayerView(); + + // Lookup voxels (GPU) + std::vector gpu_lookup_voxels; + std::vector flags; + std::tie(gpu_lookup_voxels, flags) = + test_utils::getVoxelsAtPositionsOnGPU(gpu_layer, p_L_vec); + + std::for_each(flags.begin(), flags.end(), + [](bool flag) { EXPECT_TRUE(flag); }); + + CHECK_EQ(cpu_lookup_voxels.size(), gpu_lookup_voxels.size()); + for (int i = 0; i < gpu_lookup_voxels.size(); i++) { + constexpr float eps = 1e-6; + EXPECT_NEAR(cpu_lookup_voxels[i].distance, gpu_lookup_voxels[i].distance, + eps); + } + + // Get some points OUTSIDE the scene + LOG(INFO) << "Testing points outside the scene."; + std::generate(p_L_vec.begin(), p_L_vec.end(), [&bounds_max]() { + return test_utils::getRandomVector3fInRange(bounds_max.array() + 1.0, + bounds_max.array() + 5.0); + }); + std::tie(gpu_lookup_voxels, flags) = + test_utils::getVoxelsAtPositionsOnGPU(gpu_layer, p_L_vec); + std::for_each(flags.begin(), flags.end(), + [](bool flag) { EXPECT_FALSE(flag); }); +} + +TEST(GpuHashTest, ResizeTest) { + // Indices to insert in the layer + std::vector block_indices; + block_indices.push_back({0, 0, 0}); + block_indices.push_back({1, 0, 0}); + block_indices.push_back({2, 0, 0}); + + // Create a layer with a two blocks + const float voxel_size = 0.1f; + TsdfLayer tsdf_layer(voxel_size, MemoryType::kUnified); + std::for_each(block_indices.begin(), block_indices.end(), + [&tsdf_layer](const Index3D& idx) { + tsdf_layer.allocateBlockAtIndex(idx); + }); + + // Push the layer into a undersized layer view and check it expands to fit. + GPULayerView gpu_layer_view(1); + gpu_layer_view.reset(&tsdf_layer); + EXPECT_GT(gpu_layer_view.size(), 1); +} + +TEST(GpuHashTest, LoadFactorTest) { + // Create a layer with 1000 blocks + constexpr size_t num_blocks = 1000; + std::vector block_indices; + for (int i = 0; i < num_blocks; i++) { + block_indices.push_back({i, 0, 0}); + } + + // Create a layer + const float voxel_size = 0.1f; + TsdfLayer tsdf_layer(voxel_size, MemoryType::kUnified); + std::for_each(block_indices.begin(), block_indices.end(), + [&tsdf_layer](const Index3D& idx) { + tsdf_layer.allocateBlockAtIndex(idx); + }); + + // Allocate a GPULayerView that can *just* hold the layer without breaking the + // load rate. + GPULayerView gpu_layer(1); + const size_t initial_gpu_layer_size = + std::ceil(static_cast(num_blocks) / gpu_layer.max_load_factor()); + gpu_layer = GPULayerView(initial_gpu_layer_size); + + // Send in the blocks + gpu_layer.reset(&tsdf_layer); + + // Check it hasn't expanded + EXPECT_EQ(gpu_layer.size(), initial_gpu_layer_size); + + // Add another block + tsdf_layer.allocateBlockAtIndex({0, 0, 1}); + + // Regenerate the layer view + gpu_layer.reset(&tsdf_layer); + + // Check it's expanded (by the right amount) + EXPECT_GT(gpu_layer.size(), initial_gpu_layer_size); + EXPECT_NEAR(static_cast(gpu_layer.size()) / + static_cast(initial_gpu_layer_size), + gpu_layer.size_expansion_factor(), 0.01); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_indexing.cpp b/nvblox/tests/test_indexing.cpp new file mode 100644 index 00000000..e0be4e96 --- /dev/null +++ b/nvblox/tests/test_indexing.cpp @@ -0,0 +1,151 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/indexing.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +#include "nvblox/tests/gpu_indexing.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +class IndexingTest : public ::testing::Test { + protected: + void SetUp() override { + std::srand(0); + block_size_ = VoxelBlock::kVoxelsPerSide * voxel_size_; + } + + float block_size_; + float voxel_size_ = 0.05; +}; + +TEST_F(IndexingTest, GetVoxelIndex) { + ASSERT_NEAR(voxel_size_, 0.05, kFloatEpsilon); + ASSERT_NEAR(block_size_, 0.4, kFloatEpsilon); + + Vector3f point(0, 0, 0); + Index3D voxel_index = getVoxelIndexFromPositionInLayer(block_size_, point); + EXPECT_TRUE(voxel_index == Index3D(0, 0, 0)); + + point = Vector3f(block_size_, block_size_, block_size_); + voxel_index = getVoxelIndexFromPositionInLayer(block_size_, point); + EXPECT_TRUE(voxel_index == Index3D(0, 0, 0)); + + point = Vector3f(2.0f, 1.0f, 3.0f); + voxel_index = getVoxelIndexFromPositionInLayer(block_size_, point); + EXPECT_TRUE(voxel_index == Index3D(0, 4, 4)) << voxel_index; + + point = Vector3f(-2.0f, -1.0f, -3.0f); + voxel_index = getVoxelIndexFromPositionInLayer(block_size_, point); + EXPECT_TRUE(voxel_index == Index3D(0, 4, 4)) << voxel_index; +} + +TEST_F(IndexingTest, CenterIndexing) { + AlignedVector test_points; + test_points.push_back({0, 0, 0}); + test_points.push_back({0.05, 0.05, 0.05}); + test_points.push_back({0.025, 0.025, 0.025}); + test_points.push_back({-1, -3, 2}); + + for (const Vector3f& point : test_points) { + Index3D voxel_index = getVoxelIndexFromPositionInLayer(block_size_, point); + Index3D block_index = getBlockIndexFromPositionInLayer(block_size_, point); + + Vector3f reconstructed_point = getPositionFromBlockIndexAndVoxelIndex( + block_size_, block_index, voxel_index); + Vector3f reconstructed_center_point = + getCenterPostionFromBlockIndexAndVoxelIndex(block_size_, block_index, + voxel_index); + + // Check the difference between the corner and the center. + Vector3f center_difference = + reconstructed_center_point - reconstructed_point; + + // Needs to be within voxel size. + EXPECT_LT((reconstructed_point - point).cwiseAbs().maxCoeff(), voxel_size_); + EXPECT_LT((reconstructed_center_point - point).cwiseAbs().maxCoeff(), + voxel_size_); + EXPECT_TRUE(center_difference.isApprox( + Vector3f(voxel_size_ / 2.0f, voxel_size_ / 2.0f, voxel_size_ / 2.0f), + kFloatEpsilon)); + } +} + +TEST_F(IndexingTest, getBlockAndVoxelIndexFromPositionInLayer) { + constexpr int kNumTests = 1000; + for (int i = 0; i < kNumTests; i++) { + // Random block and voxel indices + constexpr int kRandomBlockIndexRange = 1000; + const Index3D block_index = test_utils::getRandomIndex3dInRange( + -kRandomBlockIndexRange, kRandomBlockIndexRange); + const Index3D voxel_index = test_utils::getRandomIndex3dInRange( + 0, VoxelBlock::kVoxelsPerSide - 1); + + // 3D point from indices, including sub-voxel randomness + constexpr float voxel_size = 0.1; + constexpr float block_size = VoxelBlock::kVoxelsPerSide * voxel_size; + const Vector3f delta = + test_utils::getRandomVector3fInRange(0.0f, voxel_size); + const Vector3f p_L = (block_index.cast() * block_size) + + (voxel_index.cast() * voxel_size) + delta; + + // Point back to voxel indices + Index3D block_index_check; + Index3D voxel_index_check; + getBlockAndVoxelIndexFromPositionInLayer( + block_size, p_L, &block_index_check, &voxel_index_check); + + // Check we get out what we put in + EXPECT_EQ(block_index_check.x(), block_index.x()); + EXPECT_EQ(block_index_check.y(), block_index.y()); + EXPECT_EQ(block_index_check.z(), block_index.z()); + } +} + +TEST_F(IndexingTest, getBlockAndVoxelIndexFromPositionInLayerRoundingErrors) { + constexpr int kNumTests = 1e7; + std::vector positions; + positions.reserve(kNumTests); + for (int i = 0; i < kNumTests; i++) { + positions.push_back(Vector3f::Random()); + } + + std::vector block_indices; + std::vector voxel_indices; + constexpr float kBlockSize = 0.1; + test_utils::getBlockAndVoxelIndexFromPositionInLayerOnGPU( + kBlockSize, positions, &block_indices, &voxel_indices); + + for (int i = 0; i < kNumTests; i++) { + EXPECT_TRUE((voxel_indices[i].array() >= 0).all()); + EXPECT_TRUE( + (voxel_indices[i].array() < VoxelBlock::kVoxelsPerSide).all()); + } +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_layer.cpp b/nvblox/tests/test_layer.cpp new file mode 100644 index 00000000..5eb4719a --- /dev/null +++ b/nvblox/tests/test_layer.cpp @@ -0,0 +1,208 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" + +#include "nvblox/tests/blox.h" +#include "nvblox/tests/blox_utils.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +Index3D getRandomIndex3DInRange(const int min, const int max) { + return Index3D(test_utils::randomIntInRange(min, max), + test_utils::randomIntInRange(min, max), + test_utils::randomIntInRange(min, max)); +} + +TEST(LayerTest, InsertionAndRetrieval) { + // Make sure this is deterministic. + std::srand(0); + + // Empty layer + constexpr float KTestBlockSize = 0.1; + BlockLayer layer(KTestBlockSize, MemoryType::kUnified); + + // Get some distict block locations + constexpr int kNumTestItems = 100; + constexpr int kMinimumIndexValue = -100; + constexpr int kMaximumIndexValue = 100; + + Index3DSet unique_block_indices; + for (int i = 0; i < kNumTestItems; i++) { + while (!unique_block_indices + .insert(getRandomIndex3DInRange(kMinimumIndexValue, + kMaximumIndexValue)) + .second) { + continue; + } + } + + // Allocate blocks at these indices (and store index in block for checking + // later) + for (auto it = unique_block_indices.begin(); it != unique_block_indices.end(); + it++) { + EXPECT_FALSE(layer.getBlockAtIndex(*it)); + IndexBlock::Ptr block_ptr = layer.allocateBlockAtIndex(*it); + block_ptr->data = *it; + } + + // Allocate blocks at these positions + for (auto it = unique_block_indices.begin(); it != unique_block_indices.end(); + it++) { + IndexBlock::ConstPtr block_ptr = layer.getBlockAtIndex(*it); + // Check it's there + EXPECT_NE(block_ptr, nullptr); + // Check it has the data we put in. + EXPECT_EQ(block_ptr->data, *it); + } +} + +TEST(LayerTest, EmptyLayer) { + constexpr float KTestBlockSize = 0.1; + BlockLayer layer(KTestBlockSize, MemoryType::kUnified); + + IndexBlock::Ptr index_block_ptr = layer.getBlockAtIndex(Index3D(4, 50, -10)); + + EXPECT_FALSE(index_block_ptr); + EXPECT_EQ(index_block_ptr.get(), nullptr); +} + +TEST(LayerTest, MinCornerBasedIndexing) { + // Check that indexing is performed with block origin at minimum corner and + // spanning block_size. E.g. from [0,0,0], [1,1,1], exclusive on the top side. + + constexpr float KTestBlockSize = 0.1; + BlockLayer layer(KTestBlockSize, MemoryType::kUnified); + + const Vector3f kPostion3DEpsilson(0.001f, 0.001f, 0.001f); + Vector3f position_low(0, 0, 0); + Vector3f position_high = + KTestBlockSize * Vector3f::Ones() - kPostion3DEpsilson; + + // Put something in on the low side of the block's range + DummyBlock::Ptr block_low_ptr = layer.allocateBlockAtPosition(position_low); + block_low_ptr->data = true; + + // Check we get the same block back on the high side. + DummyBlock::ConstPtr block_high_ptr = + layer.allocateBlockAtPosition(position_high); + EXPECT_NE(block_high_ptr, nullptr); + EXPECT_EQ(block_low_ptr, block_high_ptr); + EXPECT_TRUE(block_high_ptr->data); +} + +TEST(VoxelLayerTest, CopyVoxelsToHost) { + constexpr float voxel_size_m = 1.0; + TsdfLayer tsdf_layer(voxel_size_m, MemoryType::kDevice); + auto block_ptr = tsdf_layer.allocateBlockAtIndex(Index3D(0, 0, 0)); + + test_utils::setTsdfBlockVoxelsInSequence(block_ptr); + + // Generating the voxel center positions + std::vector positions_L; + for (int x = 0; x < TsdfBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < TsdfBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < TsdfBlock::kVoxelsPerSide; z++) { + positions_L.push_back(Index3D(x, y, z).cast() + + 0.5 * Vector3f::Ones()); + } + } + } + + std::vector voxels; + std::vector flags; + tsdf_layer.getVoxels(positions_L, &voxels, &flags); + + for (int i = 0; i < voxels.size(); i++) { + EXPECT_TRUE(flags[i]); + + EXPECT_EQ(voxels[i].distance, static_cast(i)); + EXPECT_EQ(voxels[i].weight, static_cast(i)); + } + + // Now try some edge cases + + // Just inside the block from {0.0f, 0.0f, 0.0f} + constexpr float kEps = 1e-5; + const Vector3f kVecEps = kEps * Vector3f::Ones(); + tsdf_layer.getVoxels({kVecEps}, &voxels, &flags); + EXPECT_EQ(flags.size(), 1); + EXPECT_EQ(voxels.size(), 1); + EXPECT_TRUE(flags[0]); + EXPECT_EQ(voxels[0].distance, 0.0f); + EXPECT_EQ(voxels[0].weight, 0.0f); + + // Just inside the block from it's far boundary {8.0f, 8.0f, 8.0f} + tsdf_layer.getVoxels({Vector3f(8.0f, 8.0f, 8.0f) - kVecEps}, &voxels, &flags); + EXPECT_TRUE(flags[0]); + EXPECT_EQ(voxels[0].distance, 511.0f); + EXPECT_EQ(voxels[0].weight, 511.0f); + + // Just outside the block from {0.0f, 0.0f, 0.0f} + tsdf_layer.getVoxels({-kVecEps}, &voxels, &flags); + EXPECT_FALSE(flags[0]); + + // Just outside the block from it's far boundary {8.0f, 8.0f, 8.0f} + tsdf_layer.getVoxels({Vector3f(8.0f, 8.0f, 8.0f) + kVecEps}, &voxels, &flags); + EXPECT_FALSE(flags[0]); +} + +TEST(LayerTest, MoveOperations) { + constexpr float voxel_size_m = 1.0; + + // BlockLayer (MeshLayer being the representative) + MeshLayer mesh_layer_1(voxel_size_m, MemoryType::kDevice); + mesh_layer_1.allocateBlockAtIndex(Index3D(0, 0, 0)); + + EXPECT_TRUE(mesh_layer_1.isBlockAllocated(Index3D(0, 0, 0))); + + MeshLayer mesh_layer_2 = std::move(mesh_layer_1); + + EXPECT_FALSE(mesh_layer_1.isBlockAllocated(Index3D(0, 0, 0))); + EXPECT_TRUE(mesh_layer_2.isBlockAllocated(Index3D(0, 0, 0))); + + MeshLayer mesh_layer_3(std::move(mesh_layer_2)); + + EXPECT_FALSE(mesh_layer_2.isBlockAllocated(Index3D(0, 0, 0))); + EXPECT_TRUE(mesh_layer_3.isBlockAllocated(Index3D(0, 0, 0))); + + // VoxelBlockLayer (TsdfLayer being the representative) + TsdfLayer tsdf_layer_1(voxel_size_m, MemoryType::kDevice); + tsdf_layer_1.allocateBlockAtIndex(Index3D(0, 0, 0)); + + TsdfLayer tsdf_layer_2 = std::move(tsdf_layer_1); + + EXPECT_FALSE(tsdf_layer_1.isBlockAllocated(Index3D(0, 0, 0))); + EXPECT_TRUE(tsdf_layer_2.isBlockAllocated(Index3D(0, 0, 0))); + + TsdfLayer tsdf_layer_3(std::move(tsdf_layer_2)); + + EXPECT_FALSE(tsdf_layer_2.isBlockAllocated(Index3D(0, 0, 0))); + EXPECT_TRUE(tsdf_layer_3.isBlockAllocated(Index3D(0, 0, 0))); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_mesh.cpp b/nvblox/tests/test_mesh.cpp new file mode 100644 index 00000000..49cca1f2 --- /dev/null +++ b/nvblox/tests/test_mesh.cpp @@ -0,0 +1,404 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/indexing.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/mesh_io.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/mesh/mesh_block.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/utils/timing.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +class MeshTest : public ::testing::Test { + protected: + void SetUp() override { + timing::Timing::Reset(); + std::srand(0); + + sdf_layer_.reset(new TsdfLayer(voxel_size_, MemoryType::kUnified)); + mesh_layer_.reset( + new MeshLayer(sdf_layer_->block_size(), MemoryType::kUnified)); + + block_size_ = mesh_layer_->block_size(); + + // Make the scene 6x6x3 meters big. + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-3.0f, -3.0f, 0.0f), + Vector3f(3.0f, 3.0f, 3.0f)); + } + + float block_size_; + float voxel_size_ = 0.10; + + TsdfLayer::Ptr sdf_layer_; + MeshLayer::Ptr mesh_layer_; + + // The actual integrator. + MeshIntegrator mesh_integrator_; + + // A simulation scene. + primitives::Scene scene_; +}; + +TEST_F(MeshTest, DirectionFromNeighborTest) { + for (int i = 0; i < 8; i++) { + Index3D dir = marching_cubes::directionFromNeighborIndex(i); + EXPECT_GE(dir.minCoeff(), 0); + EXPECT_LE(dir.maxCoeff(), 1); + + int index = marching_cubes::neighborIndexFromDirection(dir); + EXPECT_EQ(i, index); + } + // Make sure 0 maps to 0. + EXPECT_EQ(marching_cubes::neighborIndexFromDirection(Index3D::Zero()), 0); +} + +TEST_F(MeshTest, BlankMap) { + std::vector block_indices_mesh = mesh_layer_->getAllBlockIndices(); + std::vector block_indices_sdf = sdf_layer_->getAllBlockIndices(); + + ASSERT_EQ(block_indices_mesh.size(), 0); + ASSERT_EQ(block_indices_sdf.size(), 0); + + // Integrate a blank map. + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField( + *sdf_layer_, mesh_layer_.get())); + + // Should still be 0. + block_indices_mesh = mesh_layer_->getAllBlockIndices(); + ASSERT_EQ(block_indices_mesh.size(), 0); +} + +TEST_F(MeshTest, PlaneMesh) { + // Create a scene that's just a plane. + // Plane at the origin pointing in the -x direction. + scene_.addPrimitive(std::make_unique( + Vector3f(0.0, 0.0, 0.0), Vector3f(-1, 0, 0))); + + scene_.generateSdfFromScene(4 * voxel_size_, sdf_layer_.get()); + + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField( + *sdf_layer_, mesh_layer_.get(), DeviceType::kCPU)); + + std::vector block_indices_mesh = mesh_layer_->getAllBlockIndices(); + std::vector block_indices_sdf = sdf_layer_->getAllBlockIndices(); + + EXPECT_GT(block_indices_sdf.size(), 0); + EXPECT_LE(block_indices_mesh.size(), block_indices_sdf.size()); + EXPECT_GT(block_indices_mesh.size(), 0); + + // Make sure there's no empty mesh blocks. + for (const Index3D& block_index : block_indices_mesh) { + MeshBlock::ConstPtr mesh_block = mesh_layer_->getBlockAtIndex(block_index); + + EXPECT_GT(mesh_block->vertices.size(), 0); + EXPECT_GT(mesh_block->normals.size(), 0); + EXPECT_GT(mesh_block->triangles.size(), 0); + + // Make sure everything is the same size. + EXPECT_EQ(mesh_block->vertices.size(), mesh_block->normals.size()); + EXPECT_EQ(mesh_block->vertices.size(), mesh_block->triangles.size()); + + std::vector vertices = mesh_block->getVertexVectorOnCPU(); + std::vector normals = mesh_block->getNormalVectorOnCPU(); + + // Make sure that the actual points are correct. + for (size_t i = 0; i < vertices.size(); i++) { + const Vector3f& vertex = vertices[i]; + const Vector3f& normal = normals[i]; + + // Make sure the points on the plane are correct. + EXPECT_NEAR(vertex.x(), 0.0, kFloatEpsilon); + EXPECT_NEAR(normal.x(), -1.0, kFloatEpsilon); + EXPECT_NEAR(normal.y(), 0.0, kFloatEpsilon); + EXPECT_NEAR(normal.z(), 0.0, kFloatEpsilon); + } + } + + io::outputMeshLayerToPly(*mesh_layer_, "test_mesh_cpu.ply"); + + std::cout << timing::Timing::Print(); +} + +TEST_F(MeshTest, ComplexScene) { + scene_.addPrimitive(std::make_unique( + Vector3f(0.0, 0.0, 0.0), Vector3f(-1, 0, 0))); + + scene_.addPrimitive(std::make_unique( + Vector3f(2.1, 0.1, 0.1), Vector3f(0, -1, 0))); + + scene_.addPrimitive( + std::make_unique(Vector3f(-2, -2, 0), 2.0)); + + scene_.generateSdfFromScene(4 * voxel_size_, sdf_layer_.get()); + + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField( + *sdf_layer_, mesh_layer_.get(), DeviceType::kCPU)); + + std::vector block_indices_mesh = mesh_layer_->getAllBlockIndices(); + EXPECT_GT(block_indices_mesh.size(), 0); + + io::outputMeshLayerToPly(*mesh_layer_, "test_mesh_complex.ply"); + std::cout << timing::Timing::Print(); +} + +TEST_F(MeshTest, GPUPlaneTest) { + // Create a scene that's just a plane. + // Plane at the origin pointing in the -x direction. + scene_.addPrimitive(std::make_unique( + Vector3f(0.00, 0.0, 0.0), Vector3f(-1, 0, 0))); + + scene_.generateSdfFromScene(4 * voxel_size_, sdf_layer_.get()); + + // Create a second mesh layer for the CPU. + BlockLayer cpu_mesh(block_size_, MemoryType::kUnified); + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField( + *sdf_layer_, &cpu_mesh, DeviceType::kCPU)); + + // Integrate from GPU. + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField( + *sdf_layer_, mesh_layer_.get(), DeviceType::kGPU)); + + std::vector block_indices_mesh = mesh_layer_->getAllBlockIndices(); + std::vector block_indices_sdf = sdf_layer_->getAllBlockIndices(); + std::vector block_indices_mesh_cpu = cpu_mesh.getAllBlockIndices(); + + EXPECT_GT(block_indices_sdf.size(), 0); + EXPECT_LE(block_indices_mesh.size(), block_indices_sdf.size()); + EXPECT_GT(block_indices_mesh.size(), 0); + EXPECT_GE(block_indices_mesh.size(), block_indices_mesh_cpu.size()); + + // With the GPU integration we might actually have empty mesh blocks. + for (const Index3D& block_index : block_indices_mesh) { + MeshBlock::ConstPtr mesh_block = mesh_layer_->getBlockAtIndex(block_index); + MeshBlock::ConstPtr mesh_block_cpu = cpu_mesh.getBlockAtIndex(block_index); + if (mesh_block_cpu == nullptr) { + EXPECT_EQ(mesh_block->vertices.size(), 0); + EXPECT_EQ(mesh_block->normals.size(), 0); + EXPECT_EQ(mesh_block->triangles.size(), 0); + } + + // Make sure everything is the same size. + EXPECT_EQ(mesh_block->vertices.size(), mesh_block->normals.size()); + EXPECT_EQ(mesh_block->vertices.size(), mesh_block->triangles.size()); + + // Make sure we have the same size as on the CPU as well. + if (mesh_block_cpu != nullptr) { + EXPECT_EQ(mesh_block->vertices.size(), mesh_block_cpu->vertices.size()); + } + std::vector vertices = mesh_block->getVertexVectorOnCPU(); + std::vector normals = mesh_block->getNormalVectorOnCPU(); + + // Make sure that the actual points are correct. + for (size_t i = 0; i < vertices.size(); i++) { + const Vector3f& vertex = vertices[i]; + const Vector3f& normal = normals[i]; + + // Make sure the points on the plane are correct. + EXPECT_NEAR(vertex.x(), 0.0, kFloatEpsilon); + EXPECT_NEAR(normal.x(), -1.0, kFloatEpsilon); + EXPECT_NEAR(normal.y(), 0.0, kFloatEpsilon); + EXPECT_NEAR(normal.z(), 0.0, kFloatEpsilon); + } + } + io::outputMeshLayerToPly(*mesh_layer_, "test_mesh_gpu.ply"); + + std::cout << timing::Timing::Print(); +} + +TEST_F(MeshTest, IncrementalMesh) { + constexpr float kSphereRadius = 2.0f; + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 80; + + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 10.0; + constexpr float kMinWeight = 2.0; + + mesh_integrator_.min_weight() = kMinWeight; + + // Create a camera. + Camera camera(300, 300, 320, 240, 640, 480); + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, 0.0f), + Vector3f(5.0f, 5.0f, 5.0f)); + // Create a scene with a ground plane, ceiling, and a sphere. + scene_.addGroundLevel(0.0f); + scene_.addCeiling(5.0f); + scene_.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene_.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Create an integrator. + ProjectiveTsdfIntegrator integrator; + integrator.max_integration_distance_m(10.0); + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Create a depth frame. We share this memory buffer for the entire + // trajectory. + DepthImage depth_frame(camera.height(), camera.width(), MemoryType::kUnified); + + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + scene_.generateDepthImageFromScene(camera, T_S_C, kMaxDist, &depth_frame); + + // Integrate this depth image. + std::vector updated_blocks; + + integrator.integrateFrame(depth_frame, T_S_C, camera, sdf_layer_.get(), + &updated_blocks); + + // Integrate the mesh. + mesh_integrator_.integrateBlocksGPU(*sdf_layer_, updated_blocks, + mesh_layer_.get()); + } + + // Create a comparison mesh. + BlockLayer::Ptr batch_mesh_layer( + new BlockLayer(block_size_, MemoryType::kUnified)); + + // Compute the batch mesh. + mesh_integrator_.integrateMeshFromDistanceField(*sdf_layer_, + batch_mesh_layer.get()); + + io::outputMeshLayerToPly(*mesh_layer_, "test_mesh_inc.ply"); + io::outputMeshLayerToPly(*batch_mesh_layer, "test_mesh_batch.ply"); + + // For each block in the batch mesh, make sure we have roughly the same + // number of points in the incremental mesh. + std::vector all_blocks = batch_mesh_layer->getAllBlockIndices(); + + for (const Index3D& block : all_blocks) { + MeshBlock::ConstPtr batch_block = batch_mesh_layer->getBlockAtIndex(block); + MeshBlock::ConstPtr inc_block = mesh_layer_->getBlockAtIndex(block); + + ASSERT_NE(inc_block.get(), nullptr); + EXPECT_EQ(batch_block->vertices.size(), inc_block->vertices.size()); + } + + std::cout << timing::Timing::Print(); +} + +TEST_F(MeshTest, RepeatabilityTest) { + const std::string base_path = "../tests/data/3dmatch"; + constexpr int seq_id = 1; + DepthImage depth_image; + ColorImage color_image; + EXPECT_TRUE(datasets::load16BitDepthImage( + datasets::threedmatch::getPathForDepthImage(base_path, seq_id, 0), + &depth_image)); + EXPECT_TRUE(datasets::load8BitColorImage( + datasets::threedmatch::getPathForColorImage(base_path, seq_id, 0), + &color_image)); + EXPECT_EQ(depth_image.width(), color_image.width()); + EXPECT_EQ(depth_image.height(), color_image.height()); + + // Parse 3x3 camera intrinsics matrix from 3D Match format: space-separated. + Eigen::Matrix3f camera_intrinsic_matrix; + EXPECT_TRUE(datasets::threedmatch::parseCameraFromFile( + datasets::threedmatch::getPathForCameraIntrinsics(base_path), + &camera_intrinsic_matrix)); + const auto camera = Camera::fromIntrinsicsMatrix( + camera_intrinsic_matrix, depth_image.width(), depth_image.height()); + + // Integrate depth + sdf_layer_ = std::make_shared(voxel_size_, MemoryType::kUnified); + ProjectiveTsdfIntegrator tsdf_integrator; + tsdf_integrator.integrateFrame(depth_image, Transform::Identity(), camera, + sdf_layer_.get()); + + // Generate the mesh (twice) + MeshLayer mesh_layer_1(block_size_, MemoryType::kUnified); + MeshLayer mesh_layer_2(block_size_, MemoryType::kUnified); + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField(*sdf_layer_, + &mesh_layer_1)); + EXPECT_TRUE(mesh_integrator_.integrateMeshFromDistanceField(*sdf_layer_, + &mesh_layer_2)); + + // Check that the generated vertices are the same. + auto block_indices_1 = mesh_layer_1.getAllBlockIndices(); + auto block_indices_2 = mesh_layer_2.getAllBlockIndices(); + EXPECT_EQ(block_indices_1.size(), block_indices_2.size()); + for (int block_idx = 0; block_idx < block_indices_1.size(); block_idx++) { + const Index3D& idx_1 = block_indices_1[block_idx]; + const Index3D& idx_2 = block_indices_2[block_idx]; + EXPECT_TRUE((idx_1.array() == idx_2.array()).all()); + MeshBlock::ConstPtr block_1 = mesh_layer_1.getBlockAtIndex(idx_1); + MeshBlock::ConstPtr block_2 = mesh_layer_2.getBlockAtIndex(idx_2); + EXPECT_EQ(block_1->vertices.size(), block_2->vertices.size()); + + auto threed_less = [](const Vector3f& p_1, const Vector3f& p_2) -> bool { + if (p_1.x() != p_2.x()) { + return p_1.x() < p_2.x(); + } + if (p_1.y() != p_2.y()) { + return p_1.y() < p_2.y(); + } + return p_1.z() < p_2.z(); + }; + auto vertex_vector_1 = block_1->getVertexVectorOnCPU(); + auto vertex_vector_2 = block_2->getVertexVectorOnCPU(); + std::sort(vertex_vector_1.begin(), vertex_vector_1.end(), threed_less); + std::sort(vertex_vector_2.begin(), vertex_vector_2.end(), threed_less); + for (int i = 0; i < vertex_vector_1.size(); i++) { + EXPECT_TRUE( + (vertex_vector_1[i].array() == vertex_vector_2[i].array()).all()); + } + } +} + +int main(int argc, char** argv) { + warmupCuda(); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_mesh_coloring.cpp b/nvblox/tests/test_mesh_coloring.cpp new file mode 100644 index 00000000..4bfbfd44 --- /dev/null +++ b/nvblox/tests/test_mesh_coloring.cpp @@ -0,0 +1,239 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" +#include "nvblox/datasets/image_loader.h" +#include "nvblox/datasets/parse_3dmatch.h" +#include "nvblox/integrators/projective_color_integrator.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/mesh_io.h" +#include "nvblox/mesh/mesh_integrator.h" +#include "nvblox/primitives/scene.h" + +using namespace nvblox; + +TEST(MeshColoringTests, UniformColorSphere) { + // The uniform color + const Color kTestColor = Color::Purple(); + + // Layer params + constexpr float voxel_size_m = 0.1; + constexpr float block_size_m = + VoxelBlock::kVoxelsPerSide * voxel_size_m; + + // TSDF layer + TsdfLayer tsdf_layer(voxel_size_m, MemoryType::kUnified); + + // Build the test scene + constexpr float kSphereRadius = 2.0f; + constexpr float kTruncationDistanceVox = 2; + constexpr float kTruncationDistanceMeters = + kTruncationDistanceVox * voxel_size_m; + + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 10.0; + constexpr float kMinWeight = 1.0; + + // Tolerance for error. + constexpr float kDistanceErrorTolerance = kTruncationDistanceMeters; + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, 0.0f), + Vector3f(5.0f, 5.0f, 5.0f)); + // Create a scene with a ground plane and a sphere. + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Get the ground truth SDF for it. + scene.generateSdfFromScene(kTruncationDistanceMeters, &tsdf_layer); + + // Make a ColorLayer with a solid color + ColorLayer color_layer(voxel_size_m, MemoryType::kUnified); + for (const Index3D& block_idx : tsdf_layer.getAllBlockIndices()) { + ColorBlock::Ptr color_block = color_layer.allocateBlockAtIndex(block_idx); + callFunctionOnAllVoxels( + color_block.get(), + [&kTestColor](const Index3D& voxel_index, ColorVoxel* voxel) { + voxel->color = kTestColor; + }); + } + + // Generate a mesh from the "reconstruction" + MeshIntegrator mesh_integrator; + MeshLayer mesh_layer(block_size_m, MemoryType::kUnified); + EXPECT_TRUE( + mesh_integrator.integrateMeshFromDistanceField(tsdf_layer, &mesh_layer)); + mesh_integrator.colorMesh(color_layer, &mesh_layer); + + // Check that all the mesh points are correctly colored + callFunctionOnAllBlocks( + mesh_layer, + [&kTestColor](const Index3D& block_index, const MeshBlock* mesh_block) { + EXPECT_EQ(mesh_block->vertices.size(), mesh_block->colors.size()); + for (const Color& color : mesh_block->colors) { + EXPECT_EQ(color, kTestColor); + } + }); +} + +TEST(MeshColoringTests, CPUvsGPUon3DMatch) { + // Load 3dmatch image + const std::string base_path = "../tests/data/3dmatch"; + constexpr int seq_id = 1; + DepthImage depth_image_1; + ColorImage color_image_1; + EXPECT_TRUE(datasets::load16BitDepthImage( + datasets::threedmatch::getPathForDepthImage(base_path, seq_id, 0), + &depth_image_1)); + EXPECT_TRUE(datasets::load8BitColorImage( + datasets::threedmatch::getPathForColorImage(base_path, seq_id, 0), + &color_image_1)); + EXPECT_EQ(depth_image_1.width(), color_image_1.width()); + EXPECT_EQ(depth_image_1.height(), color_image_1.height()); + + // Parse 3x3 camera intrinsics matrix from 3D Match format: space-separated. + Eigen::Matrix3f camera_intrinsic_matrix; + EXPECT_TRUE(datasets::threedmatch::parseCameraFromFile( + datasets::threedmatch::getPathForCameraIntrinsics(base_path), + &camera_intrinsic_matrix)); + const auto camera = Camera::fromIntrinsicsMatrix( + camera_intrinsic_matrix, depth_image_1.width(), depth_image_1.height()); + + // Integrate depth + constexpr float kVoxelSizeM = 0.05f; + const float kBlockSizeM = VoxelBlock::kVoxelsPerSide * kVoxelSizeM; + ProjectiveTsdfIntegrator tsdf_integrator; + TsdfLayer tsdf_layer(kVoxelSizeM, MemoryType::kUnified); + tsdf_integrator.integrateFrame(depth_image_1, Transform::Identity(), camera, + &tsdf_layer); + + // Integrate Color (GPU) + ProjectiveColorIntegrator color_integrator; + ColorLayer color_layer(kVoxelSizeM, MemoryType::kUnified); + color_integrator.integrateFrame(color_image_1, Transform::Identity(), camera, + tsdf_layer, &color_layer); + + // Generate a mesh from the "reconstruction" + MeshIntegrator mesh_integrator; + MeshLayer mesh_layer_colored_on_gpu(kBlockSizeM, MemoryType::kUnified); + EXPECT_TRUE(mesh_integrator.integrateMeshFromDistanceField( + tsdf_layer, &mesh_layer_colored_on_gpu)); + + // Copy the mesh + MeshLayer mesh_layer_colored_on_cpu(kBlockSizeM, MemoryType::kUnified); + for (const Index3D& block_idx : + mesh_layer_colored_on_gpu.getAllBlockIndices()) { + MeshBlock::ConstPtr mesh_block = + mesh_layer_colored_on_gpu.getBlockAtIndex(block_idx); + MeshBlock::Ptr mesh_block_copy = + mesh_layer_colored_on_cpu.allocateBlockAtIndex(block_idx); + mesh_block_copy->vertices = unified_vector(mesh_block->vertices); + mesh_block_copy->normals = unified_vector(mesh_block->normals); + } + + // Color on GPU and CPU + mesh_integrator.colorMeshGPU(color_layer, &mesh_layer_colored_on_gpu); + mesh_integrator.colorMeshCPU(color_layer, &mesh_layer_colored_on_cpu); + + // Compare colors between the two implementations + int num_same = 0; + int num_diff = 0; + int num_diff_outside = 0; + int total_vertices = 0; + + auto block_indices_gpu = mesh_layer_colored_on_gpu.getAllBlockIndices(); + auto block_indices_cpu = mesh_layer_colored_on_cpu.getAllBlockIndices(); + EXPECT_EQ(block_indices_gpu.size(), block_indices_cpu.size()); + for (int idx = 0; idx < block_indices_gpu.size(); idx++) { + const Index3D& block_idx = block_indices_gpu[idx]; + + MeshBlock::ConstPtr block_gpu = + mesh_layer_colored_on_gpu.getBlockAtIndex(block_idx); + MeshBlock::ConstPtr block_cpu = + mesh_layer_colored_on_cpu.getBlockAtIndex(block_idx); + CHECK(block_gpu); + CHECK(block_cpu); + + EXPECT_EQ(block_gpu->vertices.size(), block_cpu->vertices.size()); + EXPECT_EQ(block_gpu->colors.size(), block_cpu->colors.size()); + EXPECT_EQ(block_gpu->vertices.size(), block_gpu->colors.size()); + for (int i = 0; i < block_gpu->colors.size(); i++) { + EXPECT_TRUE( + (block_gpu->vertices[i].array() == block_cpu->vertices[i].array()) + .all()); + if (block_gpu->colors[i] == block_cpu->colors[i]) { + num_same++; + } else { + num_diff++; + // OK so we have different colors at this vertex. + // This CAN occur because of vertices that leave block boundaries. (For + // speed we only take the closest color voxel in the corresponding + // block). + // Let's check that this is indeed the case here. + + // Calculate the position of this vertex in the block + const Vector3f p_V_B_m = + block_cpu->vertices[i] - + getPositionFromBlockIndex(kBlockSizeM, block_idx); + const Vector3f p_V_B_vox = p_V_B_m / kVoxelSizeM; + if ((p_V_B_vox.array() > VoxelBlock::kVoxelsPerSide).any()) { + num_diff_outside++; + } + } + } + total_vertices += block_cpu->vertices.size(); + } + + // OK so there are a few verices which get different colors and are WITHIN + // block boundaries. From looking at them this is because they sit exactly on + // voxel boundaries and presumably are rounded different ways in the CPU and + // GPU implementations. Let's just check that the number of such occurances + // are exceedingly small. + constexpr float kAllowablePercentageDifferingColorWithinBlock = 0.1f; // 0.1% + const float percentage_different = + 100.0f * (num_diff - num_diff_outside) / total_vertices; + EXPECT_LT(percentage_different, + kAllowablePercentageDifferingColorWithinBlock); + + std::cout << "Number of vertices assigned the SAME color between CPU and GPU " + "implementations: " + << num_same << std::endl; + std::cout << "Number of vertices assigned DIFFERING color between CPU and " + "GPU implementations: " + << num_diff << std::endl; + std::cout << "of these, number with vertexes outside block boundaries: " + << num_diff_outside << std::endl; + std::cout + << "The percentage of vertexes with different colors within blocks: " + << percentage_different << "%" << std::endl; +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_nvtx_ranges.cpp b/nvblox/tests/test_nvtx_ranges.cpp new file mode 100644 index 00000000..266d7bd4 --- /dev/null +++ b/nvblox/tests/test_nvtx_ranges.cpp @@ -0,0 +1,75 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/unified_vector.h" +#include "nvblox/utils/nvtx_ranges.h" +#include "nvblox/utils/timing.h" + +#include "nvblox/tests/test_utils_cuda.h" + +using namespace nvblox; + +// NOTE(alexmillane): This file doesn't really test per-se NvtxRange. The intent +// is that this test generates nsight system trace for inspection. + +TEST(NvtxTest, Ranges) { + timing::NvtxRange whole_test_nvtx_range("whole_test", Color::Orange()); + + constexpr int kSize = 1e6; + constexpr int kInitialValue = 0; + unified_vector vec(kSize, kInitialValue); + + constexpr int kNumAdds = 10; + for (int i = 0; i < kNumAdds; i++) { + timing::NvtxRange nvtx_range("add one idx: " + std::to_string(i), + Color::Red()); + test_utils::addOneToAllGPU(&vec); + } + + timing::NvtxRange nvtx_range("check", Color::Green()); + EXPECT_TRUE( + test_utils::checkVectorAllConstant(vec, kInitialValue + kNumAdds)); + nvtx_range.Stop(); +} + +TEST(NvtxTest, TimerNvtx) { + constexpr int kSize = 1e6; + constexpr int kInitialValue = 0; + unified_vector vec(kSize, kInitialValue); + + constexpr int kNumAdds = 10; + for (int i = 0; i < kNumAdds; i++) { + timing::mark("add:" + std::to_string(i), Color::Red()); + timing::TimerNvtx timer("add_one"); + test_utils::addOneToAllGPU(&vec); + } + + timing::TimerNvtx timer("check", Color::Green()); + EXPECT_TRUE( + test_utils::checkVectorAllConstant(vec, kInitialValue + kNumAdds)); + timer.Stop(); + + std::cout << timing::Timing::Print() << std::endl; +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_ray_caster.cpp b/nvblox/tests/test_ray_caster.cpp new file mode 100644 index 00000000..d18f2135 --- /dev/null +++ b/nvblox/tests/test_ray_caster.cpp @@ -0,0 +1,117 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/integrators/ray_caster.h" +#include "nvblox/utils/timing.h" + +using namespace nvblox; + +class RayCasterTest : public ::testing::Test { + protected: + void SetUp() override { + timing::Timing::Reset(); + std::srand(0); + } + + static constexpr float kFloatEpsilon = 1e-4; +}; + +TEST_F(RayCasterTest, StraightAheadCast) { + Vector3f start(0.0f, 0.0f, 0.0f); + Vector3f end(5.0f, 0.0f, 0.0f); + RayCaster raycaster(start, end); + + std::vector all_indices; + raycaster.getAllIndices(&all_indices); + + EXPECT_EQ(all_indices.size(), 6); + + // Next make a scaled raycaster. + float scale = 2.0f; + RayCaster scaled_raycaster(scale * start, scale * end, scale); + std::vector all_scaled_indices; + scaled_raycaster.getAllIndices(&all_scaled_indices); + + ASSERT_EQ(all_indices.size(), all_scaled_indices.size()); + + // Finally, negative. + RayCaster negative_raycaster(-start, -end); + std::vector all_negative_indices; + negative_raycaster.getAllIndices(&all_negative_indices); + + ASSERT_EQ(all_indices.size(), all_negative_indices.size()); + + for (size_t i = 0; i < all_indices.size(); i++) { + EXPECT_EQ(all_indices[i], all_scaled_indices[i]); + EXPECT_EQ(all_indices[i], -all_negative_indices[i]); + EXPECT_EQ(all_indices[i].y(), 0); + EXPECT_EQ(all_indices[i].z(), 0); + } +} + +TEST_F(RayCasterTest, ObliqueCast) { + Vector3f start(0.5f, -1.1f, 3.1f); + Vector3f end(5.1f, 0.2f, 2.1f); + RayCaster raycaster(start, end); + + std::vector all_indices; + raycaster.getAllIndices(&all_indices); + + // Next make a scaled raycaster. + float scale = 2.0f; + RayCaster scaled_raycaster(scale * start, scale * end, scale); + std::vector all_scaled_indices; + scaled_raycaster.getAllIndices(&all_scaled_indices); + + ASSERT_EQ(all_indices.size(), all_scaled_indices.size()); + + // Finally, backwards. + RayCaster backwards_raycaster(end, start); + std::vector all_backwards_indices; + backwards_raycaster.getAllIndices(&all_backwards_indices); + + ASSERT_EQ(all_indices.size(), all_backwards_indices.size()); + + for (size_t i = 0; i < all_indices.size(); i++) { + EXPECT_EQ(all_indices[i], all_scaled_indices[i]); + EXPECT_EQ(all_indices[i], + all_backwards_indices[all_indices.size() - i - 1]); + } +} + +TEST_F(RayCasterTest, Length0Cast) { + Vector3f start(0.0f, 0.0f, 0.0f); + Vector3f end = start; + RayCaster raycaster(start, end); + + std::vector all_indices; + raycaster.getAllIndices(&all_indices); + + EXPECT_EQ(all_indices.size(), 1); + EXPECT_EQ(all_indices[0], Index3D::Zero()); +} + +int main(int argc, char** argv) { + warmupCuda(); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_scene.cpp b/nvblox/tests/test_scene.cpp new file mode 100644 index 00000000..92d54b77 --- /dev/null +++ b/nvblox/tests/test_scene.cpp @@ -0,0 +1,150 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/io/csv.h" +#include "nvblox/primitives/scene.h" + +using namespace nvblox; + +constexpr float kFloatEpsilon = 1e-4; + +class SceneTest : public ::testing::Test { + protected: + SceneTest() : camera_(Camera(fu_, fv_, cu_, cv_, width_, height_)) {} + + void SetUp() override { + // Make the scene 6x6x3 meters big. + scene_.aabb() = AxisAlignedBoundingBox(Vector3f(-3.0f, -3.0f, 0.0f), + Vector3f(3.0f, 3.0f, 3.0f)); + } + + // A simulation scene. + primitives::Scene scene_; + + // Camera parameters. + constexpr static float fu_ = 300; + constexpr static float fv_ = 300; + constexpr static int width_ = 640; + constexpr static int height_ = 480; + constexpr static float cu_ = static_cast(width_) / 2.0f; + constexpr static float cv_ = static_cast(height_) / 2.0f; + Camera camera_; +}; + +TEST_F(SceneTest, BlankMap) { + constexpr float max_dist = 1.0; + + // Ensure that we don't get any distances back. + Vector3f point = Vector3f::Zero(); + + float dist = scene_.getSignedDistanceToPoint(point, max_dist); + + EXPECT_NEAR(dist, max_dist, kFloatEpsilon); + + // Ensure that we get a blank image. + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + Transform T_S_C = Transform::Identity(); + scene_.generateDepthImageFromScene(camera_, T_S_C, max_dist, &depth_frame); + + // Check all the pixels. + for (int lin_idx = 0; lin_idx < depth_frame.numel(); lin_idx++) { + EXPECT_NEAR(depth_frame(lin_idx), 0.0f, kFloatEpsilon); + } +} + +TEST_F(SceneTest, PlaneScene) { + constexpr float max_dist = 2.0; + + // Create a scene that's just a plane. + // Plane at 1.0 in the positive z direction, pointing in -z. + scene_.addPrimitive(std::make_unique( + Vector3f(0.0f, 0.0, 1.0), Vector3f(0, 0, -1))); + + // Get a camera pointing to this plane from the origin. + Transform T_S_C = Transform::Identity(); + + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + scene_.generateDepthImageFromScene(camera_, T_S_C, max_dist, &depth_frame); + + // Check all the pixels. + for (int lin_idx = 0; lin_idx < depth_frame.numel(); lin_idx++) { + EXPECT_NEAR(depth_frame(lin_idx), 1.0f, kFloatEpsilon); + } +} + +TEST_F(SceneTest, PlaneSceneVertical) { + constexpr float max_dist = 2.0; + + // Create a scene that's just a plane. + // Plane at 1.0 in the positive x direction, pointing in -x. + scene_.addPrimitive(std::make_unique( + Vector3f(1.0f, 0.0, 0.0), Vector3f(-1, 0, 0))); + + // Get a camera pointing to this plane from the origin. + Eigen::Quaternionf rotation = + Eigen::Quaternionf::FromTwoVectors(Vector3f(0, 0, 1), Vector3f(1, 0, 0)); + + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation); + + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + scene_.generateDepthImageFromScene(camera_, T_S_C, max_dist, &depth_frame); + io::writeToCsv("test_plane_scene_vertical.csv", depth_frame); + + // Check all the pixels. + for (int lin_idx = 0; lin_idx < depth_frame.numel(); lin_idx++) { + EXPECT_NEAR(depth_frame(lin_idx), 1.0f, kFloatEpsilon); + } +} + +TEST_F(SceneTest, PlaneSceneVerticalOffset) { + constexpr float max_dist = 4.0; + + // Create a scene that's just a plane. + // Plane at 1.0 in the positive x direction, pointing in -x. + scene_.addPrimitive(std::make_unique( + Vector3f(1.0f, 0.0, 0.0), Vector3f(-1, 0, 0))); + + // Get a camera pointing to this plane from (-1, 0, 0). + Eigen::Quaternionf rotation = + Eigen::Quaternionf::FromTwoVectors(Vector3f(0, 0, 1), Vector3f(1, 0, 0)); + Vector3f translation(-1, 0, 0); + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation.normalized()); + T_S_C.pretranslate(translation); + + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + scene_.generateDepthImageFromScene(camera_, T_S_C, max_dist, &depth_frame); + + // Check all the pixels. + for (int lin_idx = 0; lin_idx < depth_frame.numel(); lin_idx++) { + EXPECT_NEAR(depth_frame(lin_idx), 2.0f, kFloatEpsilon); + } +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_sphere_tracing.cpp b/nvblox/tests/test_sphere_tracing.cpp new file mode 100644 index 00000000..13f14921 --- /dev/null +++ b/nvblox/tests/test_sphere_tracing.cpp @@ -0,0 +1,483 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/camera.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/primitives/primitives.h" +#include "nvblox/primitives/scene.h" +#include "nvblox/ray_tracing/sphere_tracer.h" +#include "nvblox/utils/timing.h" + +#include "nvblox/tests/gpu_image_routines.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +class SphereTracingTest : public ::testing::Test { + protected: + SphereTracingTest() + : layer_( + std::make_shared(voxel_size_m_, MemoryType::kUnified)) { + constexpr float fu = 300; + constexpr float fv = 300; + constexpr int width = 640; + constexpr int height = 480; + constexpr float cu = static_cast(width) / 2.0f; + constexpr float cv = static_cast(height) / 2.0f; + camera_ptr_ = std::make_unique(fu, fv, cu, cv, width, height); + } + + // Layers + constexpr static float voxel_size_m_ = 0.05f; + std::shared_ptr layer_; + + // Integration/Ground-truth distance computation + constexpr static float truncation_distance_vox_ = 4.0f; + constexpr static float truncation_distance_m_ = + truncation_distance_vox_ * voxel_size_m_; + + // Test camera + std::unique_ptr camera_ptr_; + + // Scenes + constexpr static float scene_sphere_radius_ = 2.0f; + const Vector3f scene_sphere_center_ = Vector3f(0.0f, 0.0f, 2.0f); + primitives::Scene getSphereInBoxScene() { + // Scene is bounded to -6, -6, 0 to 6, 6, 6. + // NOTE(alexmillane): I increased the size to 6 here because if the scene + // has walls on the very edge, the distance just through the wall is not + // allocated, making the ray marching go straight through the wall. Note + // that in reality, we will always have some thickness to the wall (the + // integrator ensures that). + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox(Vector3f(-6.0f, -6.0f, -1.0f), + Vector3f(6.0f, 6.0f, 6.0f)); + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive(std::make_unique( + scene_sphere_center_, scene_sphere_radius_)); + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + return scene; + } + Transform getRandomViewpointInSphereInBoxScene() { + // - position + // - inside the (-5, -5, 0) to (5, 5, 5) bounds, + // - and not inside the sphere in the center + // - orientation + // - random + auto is_inside_sphere = [this](const Vector3f& p) -> bool { + return (p - scene_sphere_center_).norm() <= scene_sphere_radius_; + }; + Vector3f p_L = Vector3f::Zero(); + while (is_inside_sphere(p_L)) { + p_L = Vector3f(test_utils::randomFloatInRange(-4.75, 4.75), + test_utils::randomFloatInRange(-4.75, 4.75), + test_utils::randomFloatInRange(0.25, 4.75)); + } + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate( + Eigen::AngleAxisf(test_utils::randomFloatInRange(-M_PI, M_PI), + test_utils::getRandomUnitVector3f()) + .toRotationMatrix()); + T_S_C.pretranslate(p_L); + return T_S_C; + } + void getSphereSceneReconstruction(const primitives::Scene& scene, + TsdfLayer* layer_ptr) { + // Objects to perform the reconstruction + ProjectiveTsdfIntegrator integrator; + integrator.truncation_distance_vox(truncation_distance_vox_); + DepthImage depth_image(camera_ptr_->height(), camera_ptr_->width(), + MemoryType::kUnified); + + // Fuse in viewpoints + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 80; + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + SphereTracer::Params params; + scene.generateDepthImageFromScene( + *camera_ptr_, T_S_C, params.maximum_ray_length_m, &depth_image); + + // Integrate this depth image. + integrator.integrateFrame(depth_image, T_S_C, *camera_ptr_, layer_ptr); + } + } +}; + +class SphereTracingInScaledDistanceFieldTest + : public SphereTracingTest, + public ::testing::WithParamInterface { + protected: + // Yo dawg I heard you like params +}; + +TEST_P(SphereTracingInScaledDistanceFieldTest, PlaneTest) { + // Get the test param + const float distance_scaling = GetParam(); + + // Test params + constexpr float kAllowableErrorInDistanceAlongRay = voxel_size_m_; + + constexpr float kVolumeHalfSize = 5.0f; + constexpr float kMinDistanceFromPlane = 1.0f; + constexpr float kMinDistanceFromVolumeEdges = 0.5; + + // Scene + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox( + Vector3f(-kVolumeHalfSize, -kVolumeHalfSize, -kVolumeHalfSize), + Vector3f(kVolumeHalfSize, kVolumeHalfSize, kVolumeHalfSize)); + // Create a scene with a plane in the center. + Vector3f plane_center(0.0f, 0.0f, 0.0f); + Vector3f plane_normal(-1.0f, 0.0f, 0.0f); + scene.addPrimitive( + std::make_unique(plane_center, plane_normal)); + + // Get the ground truth SDF for it. + const float truncation_distance_m = 5.0; + scene.generateSdfFromScene(truncation_distance_m, layer_.get()); + + // Scale the distance for all voxels in the layer + auto scaling_lambda = [distance_scaling](const Index3D&, const Index3D&, + TsdfVoxel* voxel) { + voxel->distance *= distance_scaling; + }; + callFunctionOnAllVoxels(layer_.get(), scaling_lambda); + + // Sphere tracer + SphereTracer sphere_tracer_gpu; + + // Test a number of random rays + constexpr int kNumTests = 1000; + int num_converged = 0; + int num_error_too_high = 0; + for (int i = 0; i < kNumTests; i++) { + // Get a random point on the plane (and in volume) + Vector3f p_plane_L(0.0f, // NOLINT + test_utils::randomFloatInRange( + -(kVolumeHalfSize - kMinDistanceFromVolumeEdges), + (kVolumeHalfSize - kMinDistanceFromVolumeEdges)), + test_utils::randomFloatInRange( + -(kVolumeHalfSize - kMinDistanceFromVolumeEdges), + (kVolumeHalfSize - kMinDistanceFromVolumeEdges))); + + // Get a random point in the volume (positive side of the plane) + Vector3f p_volume_L(test_utils::randomFloatInRange( + -(kVolumeHalfSize - kMinDistanceFromVolumeEdges), + -kMinDistanceFromPlane), + test_utils::randomFloatInRange( + -(kVolumeHalfSize - kMinDistanceFromVolumeEdges), + (kVolumeHalfSize - kMinDistanceFromVolumeEdges)), + test_utils::randomFloatInRange( + -(kVolumeHalfSize - kMinDistanceFromVolumeEdges), + (kVolumeHalfSize - kMinDistanceFromVolumeEdges))); + + // Ray between these points + const Ray ray{(p_plane_L - p_volume_L).normalized(), p_volume_L}; + + // March + float t; + const bool marcher_converged = + sphere_tracer_gpu.castOnGPU(ray, *layer_, truncation_distance_m_, &t); + EXPECT_TRUE(marcher_converged); + if (!marcher_converged) { + continue; + } + + // GT + float distance_gt; + Vector3f ray_intersection_gt; + constexpr float kMaxDist = 20.0f; + EXPECT_TRUE(scene.getRayIntersection(ray.origin, ray.direction, kMaxDist, + &ray_intersection_gt, &distance_gt)); + + // Check + const float error = std::abs(t - distance_gt); + bool error_too_high = error > kAllowableErrorInDistanceAlongRay; + + // Tally + if (marcher_converged) ++num_converged; + if (error_too_high) ++num_error_too_high; + } + + constexpr float kAllowableErrorTooHighPercentage = 0.5f; + float percentage_error_too_high = num_error_too_high * 100.0f / kNumTests; + EXPECT_LT(percentage_error_too_high, kAllowableErrorTooHighPercentage); + + LOG(INFO) << "num_converged: " << num_converged << " / " << kNumTests; + LOG(INFO) << "num_error_too_high: " << num_error_too_high << " / " + << kNumTests; +} + +INSTANTIATE_TEST_CASE_P(PlaneTests, SphereTracingInScaledDistanceFieldTest, + ::testing::Values(0.9f, 1.0f, 1.1f)); + +enum class DistanceFieldType { kGroundTruth, kReconstruction }; + +class SphereTracingInSphereSceneTest + : public SphereTracingTest, + public ::testing::WithParamInterface { + protected: + // Yo dawg I heard you like params +}; + +TEST_P(SphereTracingInSphereSceneTest, SphereSceneTests) { + // Scene + primitives::Scene scene = getSphereInBoxScene(); + + // Distance Field type + const DistanceFieldType field_type = GetParam(); + + // Distance field (GT or reconsruction) + if (field_type == DistanceFieldType::kGroundTruth) { + std::cout << "Testing on Ground Truth distance field." << std::endl; + scene.generateSdfFromScene(truncation_distance_m_, layer_.get()); + } else { + std::cout << "Testing on reconstructed distance field." << std::endl; + getSphereSceneReconstruction(scene, layer_.get()); + } + + // Sphere tracer + SphereTracer sphere_tracer_gpu; + + // Declare the images here so we have access to them after the tests + std::shared_ptr depth_image_sphere_traced_ptr; + DepthImage depth_frame_gt(camera_ptr_->height(), camera_ptr_->width(), + MemoryType::kUnified); + DepthImage diff; + + constexpr int kNUmImages = 10; + for (int i = 0; i < kNUmImages; i++) { + // Random view points + const Transform T_S_C = getRandomViewpointInSphereInBoxScene(); + + // Generate a sphere traced image + timing::Timer render_timer("render"); + depth_image_sphere_traced_ptr = sphere_tracer_gpu.renderImageOnGPU( + *camera_ptr_, T_S_C, *layer_, truncation_distance_m_, + MemoryType::kUnified); + render_timer.Stop(); + + // Generate a GT image + scene.generateDepthImageFromScene( + *camera_ptr_, T_S_C, sphere_tracer_gpu.params().maximum_ray_length_m, + &depth_frame_gt); + + // Error image + test_utils::getDifferenceImageOnGPU(*depth_image_sphere_traced_ptr, + depth_frame_gt, &diff); + + // Count the number of error pixels. + // NOTE(alexmillane): We ignore rays that do not converge (this occurs in + // small regions of the reconstruction test). + constexpr float kErrorPixelThreshold = 4.0f * voxel_size_m_; + int num_error_pixels = 0; + int num_rays_converged = 0; + for (int i = 0; i < diff.numel(); i++) { + if ((*depth_image_sphere_traced_ptr)(i) > 0.0f) { + ++num_rays_converged; + if (diff(i) > kErrorPixelThreshold) { + ++num_error_pixels; + } + } + } + const float percentage_rays_converged = + static_cast(num_rays_converged) * 100.0f / diff.numel(); + const float percentage_error_pixels = + static_cast(num_error_pixels) * 100.0f / diff.numel(); + std::cout << "percentage of rays converged " + << " = " << percentage_rays_converged << "\%" << std::endl; + std::cout << "number of pixels with error > " << kErrorPixelThreshold + << ": " << num_error_pixels << " = " << percentage_error_pixels + << "\%" << std::endl; + + constexpr float kPercentagePixelsFailThreshold = 2.0f; // percent! + EXPECT_LT(percentage_error_pixels, kPercentagePixelsFailThreshold); + + // In the groundtruth distance field we expect all rays to converge + if (field_type == DistanceFieldType::kGroundTruth) { + EXPECT_GT(percentage_rays_converged, 99.5f); + } + } + + std::cout << timing::Timing::Print() << std::endl; + + // Write Images + io::writeToCsv("sphere_tracing_image.txt", *depth_image_sphere_traced_ptr); + io::writeToCsv("sphere_tracing_gt.txt", depth_frame_gt); + io::writeToCsv("sphere_tracing_diff.txt", diff); + + // Write Scene + // io::outputVoxelLayerToPly(*layer_, "sphere_tracing_scene.ply"); +} + +INSTANTIATE_TEST_CASE_P(SphereSceneTests, SphereTracingInSphereSceneTest, + ::testing::Values(DistanceFieldType::kGroundTruth, + DistanceFieldType::kReconstruction)); + +void generateErrorImageFromSubsampledImage(const DepthImage& original, + const DepthImage& subsampled, + DepthImage* diff) { + CHECK_EQ(original.rows(), diff->rows()); + CHECK_EQ(original.cols(), diff->cols()); + CHECK_EQ(original.rows() % subsampled.rows(), 0); + CHECK_EQ(original.cols() % subsampled.cols(), 0); + + const int subsampling_factor = original.rows() / subsampled.rows(); + CHECK_EQ(original.cols() / subsampled.cols(), subsampling_factor); + + for (int row_idx = 0; row_idx < original.rows(); row_idx++) { + for (int col_idx = 0; col_idx < original.cols(); col_idx++) { + const Vector2f u_px_subsampled( + static_cast(col_idx) / static_cast(subsampling_factor) + + 0.5, + static_cast(row_idx) / static_cast(subsampling_factor) + + 0.5); + + const float original_depth = original(row_idx, col_idx); + const bool original_success = original_depth > 0.0f; + + float subsampled_depth; + const bool subsampled_success = interpolation::interpolate2DLinear( + subsampled, u_px_subsampled, &subsampled_depth); + + if (original_success && subsampled_success) { + const float depth_error = std::abs(original_depth - subsampled_depth); + (*diff)(row_idx, col_idx) = depth_error; + } else { + (*diff)(row_idx, col_idx) = -1.0f; + } + } + } +} + +float getPercentageErrorPixels(const DepthImage& diff, + const float error_threshold) { + int num_error_pixels = 0; + for (int i = 0; i < diff.numel(); i++) { + if ((diff)(i) > 0.0f) { + if (diff(i) > error_threshold) { + ++num_error_pixels; + } + } + } + return static_cast(num_error_pixels) / + static_cast(diff.numel()) * 100.0f; +} + +TEST_F(SphereTracingTest, SubsamplingTest) { + // Scene + primitives::Scene scene = getSphereInBoxScene(); + + // Distance field (GT or reconsruction) + getSphereSceneReconstruction(scene, layer_.get()); + + // Sphere tracer + SphereTracer sphere_tracer_gpu; + + // Declare the images here so we have access to them after the tests + DepthImage depth_image_full; + DepthImage depth_image_half; + DepthImage depth_image_quarter; + DepthImage diff(camera_ptr_->rows(), camera_ptr_->cols(), + MemoryType::kUnified); + + constexpr int kNUmImages = 10; + for (int i = 0; i < kNUmImages; i++) { + // Random view points + const Transform T_S_C = getRandomViewpointInSphereInBoxScene(); + + // Generate a sphere traced image + timing::Timer render_timer_full("render/full"); + depth_image_full = *sphere_tracer_gpu.renderImageOnGPU( + *camera_ptr_, T_S_C, *layer_, truncation_distance_m_, + MemoryType::kUnified); + render_timer_full.Stop(); + + timing::Timer render_timer_half("render/half"); + depth_image_half = *sphere_tracer_gpu.renderImageOnGPU( + *camera_ptr_, T_S_C, *layer_, truncation_distance_m_, + MemoryType::kUnified, 2); + render_timer_half.Stop(); + + timing::Timer render_timer_quarter("render/quarter"); + depth_image_quarter = *sphere_tracer_gpu.renderImageOnGPU( + *camera_ptr_, T_S_C, *layer_, truncation_distance_m_, + MemoryType::kUnified, 4); + render_timer_quarter.Stop(); + + // Errors + constexpr float kErrorPixelThreshold = 0.5; + generateErrorImageFromSubsampledImage(depth_image_full, depth_image_half, + &diff); + const float percentage_error_pixels_half = + getPercentageErrorPixels(diff, kErrorPixelThreshold); + + generateErrorImageFromSubsampledImage(depth_image_full, depth_image_quarter, + &diff); + const float percentage_error_pixels_quarter = + getPercentageErrorPixels(diff, kErrorPixelThreshold); + + std::cout << "percentage_error_pixels_half: " + << percentage_error_pixels_half << std::endl; + std::cout << "percentage_error_pixels_quarter: " + << percentage_error_pixels_quarter << std::endl; + + EXPECT_LT(percentage_error_pixels_half, 2.0f); + EXPECT_LT(percentage_error_pixels_half, 4.0f); + } + + std::cout << timing::Timing::Print() << std::endl; + + // Write Images + io::writeToCsv("sphere_tracing_image_full.csv", depth_image_full); + io::writeToCsv("sphere_tracing_image_half.csv", depth_image_half); + io::writeToCsv("sphere_tracing_image_quarter.csv", depth_image_quarter); + io::writeToCsv("sphere_tracing_image_subsampling_diff.csv", diff); +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + warmupCuda(); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_traits.cpp b/nvblox/tests/test_traits.cpp new file mode 100644 index 00000000..88111c25 --- /dev/null +++ b/nvblox/tests/test_traits.cpp @@ -0,0 +1,62 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/blox.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/voxels.h" + +using namespace nvblox; + +struct NewVoxel { + float voxel_data = 0.0f; +}; + +struct NewBlock { + typedef unified_ptr Ptr; + typedef unified_ptr ConstPtr; + + static Ptr allocate(MemoryType memory_type) { + return make_unified(); + }; + + float block_data = 0.0f; +}; + +using NewVoxelLayer = VoxelBlockLayer; +using NewBlockLayer = BlockLayer; + +TEST(TraitsTest, LayerTraits) { + // Existing layer types + EXPECT_TRUE(traits::is_voxel_layer()); + EXPECT_FALSE(traits::is_voxel_layer()); + EXPECT_TRUE(traits::is_voxel_layer()); + EXPECT_FALSE(traits::is_voxel_layer()); + // New layer types + static_assert(traits::is_voxel_layer()); + static_assert(!traits::is_voxel_layer()); + static_assert(traits::is_voxel_layer()); + static_assert(!traits::is_voxel_layer()); +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_tsdf_integrator.cpp b/nvblox/tests/test_tsdf_integrator.cpp new file mode 100644 index 00000000..4453bad2 --- /dev/null +++ b/nvblox/tests/test_tsdf_integrator.cpp @@ -0,0 +1,391 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include + +#include "nvblox/core/camera.h" +#include "nvblox/core/image.h" +#include "nvblox/core/interpolation_3d.h" +#include "nvblox/core/layer.h" +#include "nvblox/core/types.h" +#include "nvblox/core/voxels.h" +#include "nvblox/integrators/projective_tsdf_integrator.h" +#include "nvblox/io/csv.h" +#include "nvblox/io/ply_writer.h" +#include "nvblox/io/pointcloud_io.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/projective_tsdf_integrator_cpu.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +DECLARE_bool(alsologtostderr); + +constexpr float kInterpolatedSurfaceDistanceEpsilon = 1e-6; + +// Parameters which define a passing test which measures the difference between +// CPU and GPU integration. We accept a test if +// <(kAcceptablePercentageOverThreshold)% voxels have less than +// (kGPUvsCPUDifferenceThresholdM)meters difference. +// NOTE(alexmillane): Obviously there are some slight differences in the voxel +// distance values in a small number of voxels. +constexpr float kGPUvsCPUDifferenceThresholdM = 1e-3; // 1mm +constexpr float kAcceptablePercentageOverThreshold = 0.4; // 0.4% + +class TsdfIntegratorTest : public ::testing::Test { + protected: + TsdfIntegratorTest() + : layer_(voxel_size_m_, MemoryType::kUnified), + camera_(Camera(fu_, fv_, cu_, cv_, width_, height_)) {} + + // Test layer + constexpr static float voxel_size_m_ = 0.2; + TsdfLayer layer_; + + // How much error we expect on the surface + constexpr static float surface_reconstruction_allowable_distance_error_vox_ = + 2.0f; + constexpr static float surface_reconstruction_allowable_distance_error_m_ = + surface_reconstruction_allowable_distance_error_vox_ * voxel_size_m_; + + // Test camera + constexpr static float fu_ = 300; + constexpr static float fv_ = 300; + constexpr static int width_ = 640; + constexpr static int height_ = 480; + constexpr static float cu_ = static_cast(width_) / 2.0f; + constexpr static float cv_ = static_cast(height_) / 2.0f; + Camera camera_; +}; + +class TsdfIntegratorTestParameterized + : public TsdfIntegratorTest, + public ::testing::WithParamInterface { + // Add a param +}; + +// ProjectiveTsdfIntegrator child that gives the tests access to the internal +// functions. +class TestProjectiveTsdfIntegrator : public ProjectiveTsdfIntegrator { + public: + TestProjectiveTsdfIntegrator() : ProjectiveTsdfIntegrator() {} + FRIEND_TEST(TsdfIntegratorTest, BlocksInView); +}; + +struct Plane { + Plane(const Vector3f& _p, const Vector3f& _n) : p(_p), n(_n){}; + + static Plane RandomAtPoint(const Vector3f& p) { + return Plane(p, Vector3f::Random().normalized()); + } + + const Vector3f p; + const Vector3f n; +}; + +DepthImage matrixToDepthImage(const Eigen::MatrixXf& mat) { + DepthImage depth_frame(mat.rows(), mat.cols(), MemoryType::kUnified); + for (int col_idx = 0; col_idx < mat.cols(); col_idx++) { + for (int row_idx = 0; row_idx < mat.rows(); row_idx++) { + depth_frame(row_idx, col_idx) = mat(row_idx, col_idx); + } + } + return depth_frame; +} + +Eigen::MatrixX3f backProjectToPlaneVectorized( + const Eigen::MatrixX2f& uv_coordinates, const Plane& plane, + const Camera& camera) { + CHECK((uv_coordinates.col(0).array() >= 0.0f).all() && + (uv_coordinates.col(0).array() < camera.width()).all()); + CHECK((uv_coordinates.col(1).array() >= 0.0f).all() && + (uv_coordinates.col(1).array() < camera.height()).all()); + // Plane-ray intersection + Eigen::ArrayX3f rays_matrix(uv_coordinates.rows(), 3); + rays_matrix.col(0) = + (uv_coordinates.col(0).array() - camera.cu()) / camera.fu(); + rays_matrix.col(1) = + (uv_coordinates.col(1).array() - camera.cv()) / camera.fv(); + rays_matrix.col(2) = 1.0f; + const Eigen::ArrayXf t_matrix = + plane.p.dot(plane.n) * + (rays_matrix.col(0) * plane.n.x() + rays_matrix.col(1) * plane.n.y() + + rays_matrix.col(2) * plane.n.z()) + .inverse(); + // Each pixel's 3D point + return rays_matrix.colwise() * t_matrix; +} + +DepthImage getDepthImage(const Plane& plane, const Camera& camera) { + CHECK(plane.p.z() > 0.0f); + // Enumerate all pixel locations. + Eigen::MatrixX2f uv_coordinates(camera.height() * camera.width(), 2); + int linear_idx = 0; + for (int u = 0; u < camera.width(); u++) { + for (int v = 0; v < camera.height(); v++) { + uv_coordinates(linear_idx, 0) = u; + uv_coordinates(linear_idx, 1) = v; + ++linear_idx; + } + } + // Back project and get depth frame + const Eigen::MatrixX3f points_C = + backProjectToPlaneVectorized(uv_coordinates, plane, camera); + Eigen::MatrixXf depths = (points_C.col(2).array()); + depths.resize(camera.height(), camera.width()); + return matrixToDepthImage(depths); +} + +Eigen::MatrixX2f getRandomPixelLocations(const int num_samples, + const Camera& camera) { + // Note: Eigen's Random() generates numbers between -1.0 and 1.0 -> hence the + // abs(). + Eigen::MatrixX2f uv_coordinates = + Eigen::MatrixX2f::Random(num_samples, 2).array().abs(); + constexpr int border_px = 20; + uv_coordinates.col(0) = + (uv_coordinates.col(0) * + static_cast(camera.width() - 1 - 2 * border_px)) + .array() + + border_px; + uv_coordinates.col(1) = + (uv_coordinates.col(1) * + static_cast(camera.height() - 1 - 2 * border_px)) + .array() + + border_px; + return uv_coordinates; +} + +TEST_P(TsdfIntegratorTestParameterized, ReconstructPlane) { + // Make sure this is deterministic. + std::srand(0); + + // Get the params + const DeviceType device_type = GetParam(); + + // Plane centered at (0,0,depth) with random (slight) slant + const Plane plane = + Plane(Vector3f(0.0f, 0.0f, 5.0f), + Vector3f(test_utils::randomFloatInRange(-0.25, 0.25), + test_utils::randomFloatInRange(-0.25, 0.25), -1.0f)); + + // Get a depth map of our view of the plane. + const DepthImage depth_frame = getDepthImage(plane, camera_); + + std::string filepath = "./depth_frame.csv"; + io::writeToCsv(filepath, depth_frame); + + // Integrate into a layer + std::unique_ptr integrator_ptr; + if (device_type == DeviceType::kCPU) { + integrator_ptr = std::make_unique(); + } else { + integrator_ptr = std::make_unique(); + } + const Transform T_L_C = Transform::Identity(); + + integrator_ptr->truncation_distance_vox(10.0f); + integrator_ptr->integrateFrame(depth_frame, T_L_C, camera_, &layer_); + + // Sample some points on the plane, within the camera view. + constexpr int kNumberOfPointsToCheck = 1000; + const Eigen::MatrixX2f u_random_C = + getRandomPixelLocations(kNumberOfPointsToCheck, camera_); + const Eigen::MatrixX3f p_check_L = + backProjectToPlaneVectorized(u_random_C, plane, camera_); + + // Get the distance of these surface points + std::vector points_L; + points_L.reserve(p_check_L.rows()); + for (int i = 0; i < p_check_L.rows(); i++) { + points_L.push_back(p_check_L.row(i)); + } + std::vector distances; + std::vector success_flags; + interpolation::interpolateOnCPU(points_L, layer_, &distances, &success_flags); + EXPECT_EQ(success_flags.size(), kNumberOfPointsToCheck); + EXPECT_EQ(distances.size(), success_flags.size()); + + // Check that something actually got integrated + float max_distance = std::numeric_limits::min(); + auto lambda = [&max_distance](const Index3D& block_index, + const Index3D& voxel_index, + const TsdfVoxel* voxel) { + if (voxel->distance > max_distance) max_distance = voxel->distance; + }; + callFunctionOnAllVoxels(layer_, lambda); + const float nothing_integrator_indicator = voxel_size_m_; + EXPECT_GT(max_distance, nothing_integrator_indicator); + + // Check that all interpolations worked and that the distance is close to zero + int num_failures = 0; + int num_bad_flags = 0; + for (int i = 0; i < distances.size(); i++) { + EXPECT_TRUE(success_flags[i]); + EXPECT_NEAR(distances[i], 0.0f, + surface_reconstruction_allowable_distance_error_m_); + if (std::abs(distances[i]) > + surface_reconstruction_allowable_distance_error_m_) { + num_failures++; + } + } + LOG(INFO) << "Num points greater than allowable distance: " << num_failures; + LOG(INFO) << "num_bad_flags: " << num_bad_flags << " / " << distances.size(); +} + +TEST_F(TsdfIntegratorTestParameterized, SphereSceneTest) { + constexpr float kSphereRadius = 2.0f; + constexpr float kTrajectoryRadius = 4.0f; + constexpr float kTrajectoryHeight = 2.0f; + constexpr int kNumTrajectoryPoints = 80; + constexpr float kTruncationDistanceVox = 2; + constexpr float kTruncationDistanceMeters = + kTruncationDistanceVox * voxel_size_m_; + // Maximum distance to consider for scene generation. + constexpr float kMaxDist = 10.0; + constexpr float kMinWeight = 1.0; + + // Tolerance for error. + constexpr float kDistanceErrorTolerance = kTruncationDistanceMeters; + + // Scene is bounded to -5, -5, 0 to 5, 5, 5. + primitives::Scene scene; + scene.aabb() = AxisAlignedBoundingBox(Vector3f(-5.0f, -5.0f, 0.0f), + Vector3f(5.0f, 5.0f, 5.0f)); + // Create a scene with a ground plane and a sphere. + scene.addGroundLevel(0.0f); + scene.addCeiling(5.0f); + scene.addPrimitive( + std::make_unique(Vector3f(0.0f, 0.0f, 2.0f), 2.0f)); + // Add bounding planes at 5 meters. Basically makes it sphere in a box. + scene.addPlaneBoundaries(-5.0f, 5.0f, -5.0f, 5.0f); + + // Get the ground truth SDF for it. + TsdfLayer gt_layer(voxel_size_m_, MemoryType::kUnified); + scene.generateSdfFromScene(kTruncationDistanceMeters, >_layer); + + // Create an integrator. + ProjectiveTsdfIntegratorCPU integrator_cpu; + ProjectiveTsdfIntegrator integrator_gpu; + integrator_cpu.truncation_distance_vox(kTruncationDistanceVox); + integrator_gpu.truncation_distance_vox(kTruncationDistanceVox); + + // Simulate a trajectory of the requisite amount of points, on the circle + // around the sphere. + const float radians_increment = 2 * M_PI / (kNumTrajectoryPoints); + + // Create a depth frame. We share this memory buffer for the entire + // trajectory. + DepthImage depth_frame(camera_.height(), camera_.width(), + MemoryType::kUnified); + + // Two layers, one for CPU integration and one for GPU integration + TsdfLayer layer_cpu(layer_.voxel_size(), MemoryType::kUnified); + TsdfLayer layer_gpu(layer_.voxel_size(), MemoryType::kUnified); + + for (size_t i = 0; i < kNumTrajectoryPoints; i++) { + const float theta = radians_increment * i; + // Convert polar to cartesian coordinates. + Vector3f cartesian_coordinates(kTrajectoryRadius * std::cos(theta), + kTrajectoryRadius * std::sin(theta), + kTrajectoryHeight); + // The camera has its z axis pointing towards the origin. + Eigen::Quaternionf rotation_base(0.5, 0.5, 0.5, 0.5); + Eigen::Quaternionf rotation_theta( + Eigen::AngleAxisf(M_PI + theta, Vector3f::UnitZ())); + + // Construct a transform from camera to scene with this. + Transform T_S_C = Transform::Identity(); + T_S_C.prerotate(rotation_theta * rotation_base); + T_S_C.pretranslate(cartesian_coordinates); + + // Generate a depth image of the scene. + scene.generateDepthImageFromScene(camera_, T_S_C, kMaxDist, &depth_frame); + + // Integrate this depth image. + integrator_cpu.integrateFrame(depth_frame, T_S_C, camera_, &layer_cpu); + integrator_gpu.integrateFrame(depth_frame, T_S_C, camera_, &layer_gpu); + } + + // Now do some checks... + // Check every voxel in the map. + auto lambda = [&](const Index3D& block_index, const Index3D& voxel_index, + const TsdfVoxel* voxel) { + if (voxel->weight >= kMinWeight) { + // Get the corresponding point from the GT layer. + const TsdfVoxel* gt_voxel = getVoxelAtBlockAndVoxelIndex( + gt_layer, block_index, voxel_index); + if (gt_voxel != nullptr) { + EXPECT_NEAR(voxel->distance, gt_voxel->distance, + kDistanceErrorTolerance); + } + } + }; + callFunctionOnAllVoxels(layer_cpu, lambda); + callFunctionOnAllVoxels(layer_gpu, lambda); + + io::outputVoxelLayerToPly(layer_gpu, "test_tsdf_projective_gpu.ply"); + io::outputVoxelLayerToPly(layer_cpu, "test_tsdf_projective_cpu.ply"); + io::outputVoxelLayerToPly(gt_layer, "test_tsdf_projective_gt.ply"); + + // Compare the layers + ASSERT_GE(layer_cpu.numAllocatedBlocks(), layer_gpu.numAllocatedBlocks()); + size_t total_num_voxels_observed = 0; + size_t num_voxels_over_threshold = 0; + for (const Index3D& block_index : layer_gpu.getAllBlockIndices()) { + const auto block_cpu = layer_cpu.getBlockAtIndex(block_index); + const auto block_gpu = layer_gpu.getBlockAtIndex(block_index); + for (int x = 0; x < VoxelBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < VoxelBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < VoxelBlock::kVoxelsPerSide; z++) { + const float diff = block_gpu->voxels[x][y][z].distance - + block_cpu->voxels[x][y][z].distance; + // EXPECT_NEAR(diff, 0.0f, kGPUvsCPUDifferenceThresholdM); + if (block_gpu->voxels[x][y][z].weight > 0.0f) { + ++total_num_voxels_observed; + if (std::abs(diff) > kGPUvsCPUDifferenceThresholdM) { + ++num_voxels_over_threshold; + } + } + } + } + } + } + + const float percentage_over_threshold = + 100.0f * (static_cast(num_voxels_over_threshold) / + static_cast(total_num_voxels_observed)); + EXPECT_LT(percentage_over_threshold, kAcceptablePercentageOverThreshold); + LOG(INFO) << "Percentage of voxels with a difference greater than " + << kGPUvsCPUDifferenceThresholdM << ": " + << percentage_over_threshold << "%"; + LOG(INFO) << "total_num_voxels_observed: " << total_num_voxels_observed + << std::endl; + LOG(INFO) << "num_voxels_over_threshold: " << num_voxels_over_threshold + << std::endl; +} + +INSTANTIATE_TEST_CASE_P(DeviceTests, TsdfIntegratorTestParameterized, + ::testing::Values(DeviceType::kCPU, DeviceType::kGPU)); + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/nvblox/tests/test_tsdf_integrator_cuda_components.cpp b/nvblox/tests/test_tsdf_integrator_cuda_components.cpp new file mode 100644 index 00000000..9df24438 --- /dev/null +++ b/nvblox/tests/test_tsdf_integrator_cuda_components.cpp @@ -0,0 +1,210 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/types.h" +#include "nvblox/primitives/primitives.h" +#include "nvblox/primitives/scene.h" + +#include "nvblox/tests/projective_tsdf_integrator_cuda_components.h" +#include "nvblox/tests/utils.h" + +using namespace nvblox; + +// TODO: Decide where to put test epsilons +constexpr float kFloatEpsilon = 1e-4; +constexpr float kImagePlaneEpsilon = 1e-2; + +class CudaTsdfIntegratorTest : public ::testing::Test { + protected: + CudaTsdfIntegratorTest() + : layer_(block_size_m_, MemoryType::kUnified), + camera_(Camera(fu_, fv_, cu_, cv_, width_, height_)) {} + + // Test layer + constexpr static float voxel_size_m_ = 0.2; + constexpr static float block_size_m_ = + VoxelBlock::kVoxelsPerSide * voxel_size_m_; + TsdfLayer layer_; + + // Test camera + constexpr static float fu_ = 300; + constexpr static float fv_ = 300; + constexpr static int width_ = 640; + constexpr static int height_ = 480; + constexpr static float cu_ = static_cast(width_) / 2.0f; + constexpr static float cv_ = static_cast(height_) / 2.0f; + Camera camera_; +}; + +Transform getRandomTransform() { + return Transform( + Eigen::Translation3f(Vector3f::Random()) * + Eigen::AngleAxisf( + test_utils::randomFloatInRange(-M_PI, M_PI), + test_utils::getRandomVector3fInRange(-1.0f, 1.0f).normalized())); +} + +TEST_F(CudaTsdfIntegratorTest, GPUVectorizedPointsTransform) { + // Make sure this is deterministic. + std::srand(0); + + const Transform T_B_A = getRandomTransform(); + + // Transform vectors on the CPU + constexpr int kNumVecs = 1000; + const Eigen::Matrix vecs_A = + Eigen::Matrix::Random(); + const Eigen::Matrix vecs_B = + (T_B_A.rotation() * vecs_A).colwise() + T_B_A.translation(); + + // Transform vectors on the GPU + const Eigen::Matrix3Xf vecs_B_from_GPU = + test_utils::transformPointsOnGPU(T_B_A, vecs_A); + + // Check we get the same result. + EXPECT_NEAR((vecs_B - vecs_B_from_GPU).maxCoeff(), 0.0f, kFloatEpsilon); +} + +TEST_F(CudaTsdfIntegratorTest, GpuBlockCenterProjection) { + std::vector block_indices; + block_indices.push_back(Index3D(0.0f, 0.0f, 0.0f)); + + // Camera Pose + Transform T_C_L = Transform::Identity(); + + // Project on CPU + std::vector cpu_block_projections; + cpu_block_projections.reserve(block_indices.size()); + for (const Index3D& block_index : block_indices) { + test_utils::BlockProjectionResult results_block; + int lin_voxel_idx = 0; + for (int x = 0; x < VoxelBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < VoxelBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < VoxelBlock::kVoxelsPerSide; z++) { + const Vector3f p_L = getCenterPostionFromBlockIndexAndVoxelIndex( + layer_.block_size(), block_index, Index3D(x, y, z)); + const Vector3f p_C = T_C_L * p_L; + Eigen::Vector2f u_px; + if (!camera_.project(p_C, &u_px)) { + u_px = Eigen::Vector2f(0.0f, 0.0f); + } + results_block.row(lin_voxel_idx) = u_px; + ++lin_voxel_idx; + } + } + } + cpu_block_projections.push_back(results_block); + } + + // Project on GPU + std::vector gpu_block_projections = + test_utils::projectBlocksOnGPU(block_indices, camera_, T_C_L, &layer_); + + // Check + for (int i = 0; i < gpu_block_projections.size(); i++) { + EXPECT_NEAR( + (cpu_block_projections[i] - gpu_block_projections[i]).maxCoeff(), 0.0f, + kFloatEpsilon); + } +} + +TEST_F(CudaTsdfIntegratorTest, GpuDepthImageInterpolation) { + // Make sure this is deterministic. + std::srand(0); + + // The frames {x_indices_, y_indices_} are set up such that if you + // interpolate, you should get the interpolated position back. Note that here, + // (in contrast to the 3D voxel grid) I consider that pixel indices correspond + // to centers (rather than low-side-corners). + + DepthImage depth_frame_col_indices(height_, width_, MemoryType::kUnified); + DepthImage depth_frame_row_indices(height_, width_, MemoryType::kUnified); + for (int col_idx = 0; col_idx < width_; col_idx++) { + for (int row_idx = 0; row_idx < height_; row_idx++) { + depth_frame_col_indices(row_idx, col_idx) = col_idx; + depth_frame_row_indices(row_idx, col_idx) = row_idx; + } + } + + // Generate random locations on the image plane + constexpr int kNumTests = 1000; + Eigen::MatrixX2f u_px_vec(kNumTests, 2); + for (int i = 0; i < kNumTests; i++) { + u_px_vec.row(i) = Vector2f( + test_utils::randomFloatInRange(0.0f, static_cast(width_ - 1)), + test_utils::randomFloatInRange(0.0f, static_cast(height_ - 1))); + } + + // CPU interpolation + Eigen::VectorXf results_col_cpu(kNumTests); + Eigen::VectorXf results_row_cpu(kNumTests); + for (int i = 0; i < kNumTests; i++) { + // Interpolate x and y grids + EXPECT_TRUE(interpolation::interpolate2DLinear( + depth_frame_col_indices, u_px_vec.row(i), &results_col_cpu(i))); + EXPECT_TRUE(interpolation::interpolate2DLinear( + depth_frame_row_indices, u_px_vec.row(i), &results_row_cpu(i))); + } + + // GPU interpolation + const Eigen::VectorXf results_col_gpu = + test_utils::interpolatePointsOnGPU(depth_frame_col_indices, u_px_vec); + const Eigen::VectorXf results_row_gpu = + test_utils::interpolatePointsOnGPU(depth_frame_row_indices, u_px_vec); + + EXPECT_NEAR((results_col_cpu - results_col_gpu).maxCoeff(), 0.0f, + kImagePlaneEpsilon); + EXPECT_NEAR((results_row_cpu - results_row_gpu).maxCoeff(), 0.0f, + kImagePlaneEpsilon); +} + +TEST_F(CudaTsdfIntegratorTest, SetBlocksOnGPU) { + // Make sure this is deterministic. + std::srand(0); + + // Allocating blocks for setting + constexpr int kNumBlocks = 10; + for (int i = 0; i < kNumBlocks; i++) { + layer_.allocateBlockAtIndex(test_utils::getRandomIndex3dInRange(-10, 10)); + } + + // The voxel to their linear indices + test_utils::setVoxelBlockOnGPU(&layer_); + + // Check + for (const Index3D& block_idx : layer_.getAllBlockIndices()) { + const auto block_ptr = layer_.getBlockAtIndex(block_idx); + float lin_idx = 0.0f; + for (int x = 0; x < VoxelBlock::kVoxelsPerSide; x++) { + for (int y = 0; y < VoxelBlock::kVoxelsPerSide; y++) { + for (int z = 0; z < VoxelBlock::kVoxelsPerSide; z++) { + EXPECT_NEAR(block_ptr->voxels[x][y][z].distance, lin_idx, + kFloatEpsilon); + ++lin_idx; + } + } + } + } +} + +int main(int argc, char** argv) { + FLAGS_alsologtostderr = true; + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_unified_ptr.cpp b/nvblox/tests/test_unified_ptr.cpp new file mode 100644 index 00000000..87d7267d --- /dev/null +++ b/nvblox/tests/test_unified_ptr.cpp @@ -0,0 +1,210 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "nvblox/core/accessors.h" +#include "nvblox/core/common_names.h" +#include "nvblox/core/types.h" +#include "nvblox/core/unified_ptr.h" + +#include "nvblox/tests/increment_on_gpu.h" +#include "nvblox/tests/test_utils_cuda.h" + +using namespace nvblox; + +void setToConstantOnCPU(const int value, const int num_elements, int ints[]) { + for (int i = 0; i < num_elements; i++) { + ints[i] = value; + } +} + +void incrementOnCPU(const int num_elements, int ints[]) { + for (int i = 0; i < num_elements; i++) { + ints[i]++; + } +} + +template +void expect_cuda_freed(T* ptr) { + // Note(alexmillane): Whether or not this call returns an error seems to vary + // between machines... Hopefully the checks below indicate that the memory has + // been freed. + cudaPointerAttributes attributes; + checkCudaErrors(cudaPointerGetAttributes(&attributes, ptr)); + EXPECT_EQ(attributes.type, cudaMemoryType::cudaMemoryTypeUnregistered); + EXPECT_EQ(attributes.devicePointer, nullptr); + EXPECT_EQ(attributes.hostPointer, nullptr); +} + +TEST(UnifiedPointerTest, IntTest) { + unified_ptr hello = make_unified(3); + + EXPECT_TRUE(hello); + EXPECT_EQ(*hello, 3); + (*hello)++; + EXPECT_EQ(*hello, 4); + + test_utils::incrementOnGPU(hello.get()); + + EXPECT_EQ(*hello, 5); + + hello.reset(); + EXPECT_FALSE(hello); + EXPECT_EQ(hello.get(), nullptr); +} + +TEST(UnifiedPointerTest, ObjectTest) { + unified_ptr> array_of_ints = + make_unified>(); + + EXPECT_TRUE(array_of_ints); + (*array_of_ints)[3] = 4; + EXPECT_EQ((*array_of_ints)[3], 4); +} + +TEST(UnifiedPointerTest, EmptyTest) { + unified_ptr vec; + EXPECT_FALSE(vec); +} + +TEST(UnifiedPointerTest, MemoryTest) { + int* raw_ptr; + { + // Put this in a disappearing scope. + unified_ptr dummy_ptr(make_unified(100)); + raw_ptr = dummy_ptr.get(); + + // Check that the memory is allocated. + cudaPointerAttributes attributes; + cudaError_t error = cudaPointerGetAttributes(&attributes, raw_ptr); + EXPECT_EQ(error, cudaSuccess); + EXPECT_EQ(attributes.type, cudaMemoryType::cudaMemoryTypeManaged); + } + + // Make sure the memory no longer exists now it's out of scope. + cudaPointerAttributes attributes; + cudaError_t error = cudaPointerGetAttributes(&attributes, raw_ptr); + // Note(alexmillane): Whether or not this call returns an error seems to vary + // between machines... Hopefully the checks below indicate that the memory has + // been freed. EXPECT_EQ(error, cudaErrorInvalidValue); + EXPECT_EQ(attributes.type, cudaMemoryType::cudaMemoryTypeUnregistered); + EXPECT_EQ(attributes.devicePointer, nullptr); + EXPECT_EQ(attributes.hostPointer, nullptr); +} + +TEST(UnifiedPointerTest, ArrayTest) { + constexpr int kNumElements = 1000; + unified_ptr goodbye = make_unified(kNumElements); + + constexpr int kValue = 100; + setToConstantOnCPU(kValue, kNumElements, goodbye.get()); + + for (int i = 0; i < kNumElements; i++) { + EXPECT_EQ(goodbye[i], kValue); + } + + incrementOnCPU(kNumElements, goodbye.get()); + + for (int i = 0; i < kNumElements; i++) { + EXPECT_EQ(goodbye[i], kValue + 1); + } + + test_utils::incrementOnGPU(kNumElements, goodbye.get()); + + for (int i = 0; i < kNumElements; i++) { + EXPECT_EQ(goodbye[i], kValue + 2); + } + + int* raw_ptr = goodbye.get(); + goodbye.reset(); + EXPECT_FALSE(goodbye); + EXPECT_EQ(goodbye.get(), nullptr); + // Make sure the memory no longer exists now it's out of scope. + expect_cuda_freed(raw_ptr); +} + +TEST(UnifiedPointerTest, HostTest) { + auto int_host_ptr = make_unified(MemoryType::kHost, 1); + EXPECT_EQ(*int_host_ptr, 1); + EXPECT_NE(*int_host_ptr, 2); + + cudaPointerAttributes attributes; + checkCudaErrors(cudaPointerGetAttributes(&attributes, int_host_ptr.get())); + EXPECT_EQ(attributes.type, cudaMemoryType::cudaMemoryTypeHost); + + int* raw_ptr = int_host_ptr.get(); + int_host_ptr.reset(); + EXPECT_FALSE(int_host_ptr); + EXPECT_EQ(int_host_ptr.get(), nullptr); + expect_cuda_freed(raw_ptr); +} + +TEST(UnifiedPointerTest, CloneTest) { + // Host-only cloning + auto int_ptr_1 = make_unified(MemoryType::kHost, 1); + EXPECT_EQ(*int_ptr_1, 1); + auto int_ptr_2 = int_ptr_1.clone(); + EXPECT_EQ(*int_ptr_2, 1); + *int_ptr_2 += 1; + EXPECT_EQ(*int_ptr_2, 2); + EXPECT_EQ(*int_ptr_1, 1); + + // Host-to-device-to-unified cloning + auto int_ptr_3 = int_ptr_1.clone(MemoryType::kDevice); + test_utils::incrementOnGPU(int_ptr_3.get()); + auto int_ptr_4 = int_ptr_3.clone(MemoryType::kUnified); + EXPECT_EQ(*int_ptr_4, 2); + + // Array cloning + constexpr int kNumElems = 10; + auto int_ptr_5 = make_unified(kNumElems, MemoryType::kDevice); + test_utils::fillWithConstant(1, kNumElems, int_ptr_5.get()); + EXPECT_TRUE(test_utils::checkAllConstant(int_ptr_5.get(), 1, kNumElems)); + test_utils::incrementOnGPU(kNumElems, int_ptr_5.get()); + EXPECT_TRUE(test_utils::checkAllConstant(int_ptr_5.get(), 2, kNumElems)); + test_utils::incrementOnGPU(kNumElems, int_ptr_5.get()); + auto int_ptr_6 = int_ptr_5.clone(MemoryType::kHost); + + for (int i = 0; i < kNumElems; i++) { + EXPECT_EQ(int_ptr_6[i], 3); + } +} + +TEST(UnifiedPointerTest, CloneConstBlock) { + constexpr float kBlockSize = 1.0f; + TsdfLayer layer(kBlockSize, MemoryType::kDevice); + auto block_ptr = layer.allocateBlockAtIndex({0, 0, 0}); + + const TsdfLayer& layer_const_ref = layer; + + TsdfBlock::Ptr cloned_block = + layer_const_ref.getBlockAtIndex({0, 0, 0}).clone(MemoryType::kHost); + + auto expect_voxel_zero_lambda = [](const Index3D& voxel_index, + const TsdfVoxel* voxel) -> void { + EXPECT_EQ(voxel->distance, 0.0f); + EXPECT_EQ(voxel->weight, 0.0f); + }; + callFunctionOnAllVoxels(*cloned_block, expect_voxel_zero_lambda); +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/tests/test_unified_vector.cpp b/nvblox/tests/test_unified_vector.cpp new file mode 100644 index 00000000..1855cd24 --- /dev/null +++ b/nvblox/tests/test_unified_vector.cpp @@ -0,0 +1,221 @@ +/* +Copyright 2022 NVIDIA CORPORATION + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include + +#include + +#include "nvblox/core/cuda/warmup.h" +#include "nvblox/core/unified_vector.h" + +#include "nvblox/tests/increment_on_gpu.h" +#include "nvblox/tests/test_utils_cuda.h" + +using namespace nvblox; + +cudaMemoryType getPointerMemoryType(void* pointer) { + // Check that the memory is allocated. + cudaPointerAttributes attributes; + cudaError_t error = cudaPointerGetAttributes(&attributes, pointer); + EXPECT_EQ(error, cudaSuccess); + return attributes.type; +} + +TEST(UnifiedVectorTest, EmptyTest) { + unified_vector vec; + EXPECT_TRUE(vec.empty()); + EXPECT_EQ(vec.data(), nullptr); + EXPECT_EQ(vec.size(), 0); +} + +TEST(UnifiedVectorTest, ClearTest) { + unified_vector vec(100, 10); + EXPECT_FALSE(vec.empty()); + EXPECT_NE(vec.data(), nullptr); + EXPECT_EQ(vec.size(), 100); + EXPECT_EQ(vec[0], 10); + EXPECT_EQ(vec[99], 10); + + vec.clear(); + EXPECT_TRUE(vec.empty()); + EXPECT_EQ(vec.data(), nullptr); + EXPECT_EQ(vec.size(), 0); +} + +TEST(UnifiedVectorTest, PushBackTest) { + unified_vector vec; + constexpr size_t kMaxSize = 999999; + for (size_t i = 0; i < kMaxSize; i++) { + vec.push_back(i); + } + EXPECT_FALSE(vec.empty()); + EXPECT_NE(vec.data(), nullptr); + EXPECT_EQ(vec.size(), kMaxSize); + + for (size_t i = 0; i < kMaxSize; i++) { + EXPECT_EQ(vec[i], i); + } + EXPECT_GE(vec.capacity(), vec.size()); +} + +TEST(UnifiedVectorTest, AssignmentTest) { + unified_vector vec; + constexpr size_t kMaxSize = 10; + for (size_t i = 0; i < kMaxSize; i++) { + vec.push_back(i); + } + + // Copy the vector over; + unified_vector vec2 = vec; + + EXPECT_FALSE(vec2.empty()); + EXPECT_NE(vec2.data(), nullptr); + EXPECT_EQ(vec2.size(), kMaxSize); + + for (size_t i = 0; i < kMaxSize; i++) { + EXPECT_EQ(vec2[i], i); + } + EXPECT_GE(vec2.capacity(), vec.size()); +} + +TEST(UnifiedVectorTest, IteratorTest) { + unified_vector vec; + constexpr size_t kMaxSize = 10; + for (size_t i = 0; i < kMaxSize; i++) { + vec.push_back(i); + } + + size_t i = 0; + for (size_t num : vec) { + EXPECT_EQ(num, i++); + } + EXPECT_EQ(i, kMaxSize); +} + +TEST(UnifiedVectorTest, CpuGpuReadWrite) { + const float value = 12.3f; + + const size_t kVectorSize = 100; + unified_vector vec; + vec.resize(kVectorSize); + test_utils::fillVectorWithConstant(value, &vec); + + // Use the operator to check on the HOST + for (int i = 0; i < kVectorSize; i++) { + EXPECT_EQ(vec[i], value); + } + + // Check on the DEVICE + EXPECT_TRUE(test_utils::checkVectorAllConstant(vec, value)); +} + +TEST(UnifiedVectorTest, SetZeroTest) { + constexpr size_t kSize = 100; + unified_vector vec(kSize, 1, MemoryType::kHost); + for (size_t i = 0; i < kSize; i++) { + EXPECT_EQ(vec[i], 1); + } + vec.setZero(); + for (size_t i = 0; i < kSize; i++) { + EXPECT_EQ(vec[i], 0); + } +} + +TEST(UnifiedVectorTest, HostToDeviceToHostCopy) { + constexpr size_t kSize = 100; + unified_vector vec(kSize, MemoryType::kHost); + EXPECT_EQ(getPointerMemoryType(vec.data()), + cudaMemoryType::cudaMemoryTypeHost); + for (size_t i = 0; i < kSize; i++) { + vec[i] = i; + } + + // Copy over to unified memory. + unified_vector vec_unified(MemoryType::kUnified); + vec_unified = vec; + EXPECT_EQ(getPointerMemoryType(vec_unified.data()), + cudaMemoryType::cudaMemoryTypeManaged); + for (size_t i = 0; i < kSize; i++) { + EXPECT_EQ(vec_unified[i], i); + // Modify it for later. + vec_unified[i] = i - 1; + } + + // Copy over to device memory. + unified_vector vec_device(MemoryType::kDevice); + vec_device = vec_unified; + EXPECT_EQ(getPointerMemoryType(vec_device.data()), + cudaMemoryType::cudaMemoryTypeDevice); + + unified_vector vec_host(vec_device, MemoryType::kHost); + EXPECT_EQ(getPointerMemoryType(vec_host.data()), + cudaMemoryType::cudaMemoryTypeHost); + for (size_t i = 0; i < kSize; i++) { + EXPECT_EQ(vec_host[i], i - 1); + } +} + +template +void checkAllConstantCPU(const VectorType& vec, const int value) { + for (auto i : vec) { + EXPECT_EQ(i, value); + } +} + +TEST(UnifiedVectorTest, HostAndDeviceVectors) { + // Host vector + constexpr int kNumElems = 100; + host_vector vec_host_1(kNumElems, 1); + checkAllConstantCPU(vec_host_1, 1); + + // To device + device_vector vec_device = vec_host_1; + EXPECT_TRUE(test_utils::checkAllConstant(vec_device.data(), 1, kNumElems)); + EXPECT_FALSE(test_utils::checkAllConstant(vec_device.data(), 2, kNumElems)); + test_utils::incrementOnGPU(kNumElems, vec_device.data()); + EXPECT_TRUE(test_utils::checkAllConstant(vec_device.data(), 2, kNumElems)); + + // Back to host + host_vector vec_host_2 = vec_device; + checkAllConstantCPU(vec_host_2, 2); + + // Conversion to std::vector + std::vector std_vec_host_1 = vec_device.toVector(); + checkAllConstantCPU(std_vec_host_1, 2); + + // Conversion from std::vector + std::vector std_vec_host_2(kNumElems, 3); + checkAllConstantCPU(std_vec_host_2, 3); + device_vector vec_device_2(std_vec_host_2); + EXPECT_TRUE(test_utils::checkAllConstant(vec_device_2.data(), 3, kNumElems)); + + // Checking conversion from base class. + host_vector std_vec_host_3(kNumElems, 4); + const unified_vector& base_class_reference = std_vec_host_3; + checkAllConstantCPU(base_class_reference, 4); + device_vector vec_device_3(base_class_reference); + EXPECT_TRUE(test_utils::checkAllConstant(vec_device_3.data(), 4, kNumElems)); +} + +int main(int argc, char** argv) { + warmupCuda(); + google::InitGoogleLogging(argv[0]); + FLAGS_alsologtostderr = true; + google::InstallFailureSignalHandler(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/nvblox/thirdparty/eigen/eigen.cmake b/nvblox/thirdparty/eigen/eigen.cmake new file mode 100644 index 00000000..9cec9191 --- /dev/null +++ b/nvblox/thirdparty/eigen/eigen.cmake @@ -0,0 +1,29 @@ +include(ExternalProject) + +# If the caller wants Eigen installed somewhere, do it, otherwise don't configure and don't install (eigen is header only). +if(EIGEN_INCLUDE_DESTINATION) + set(EIGEN_INCLUDE_DIRS "${EIGEN_INCLUDE_DESTINATION}/include/eigen3") + file(MAKE_DIRECTORY ${EIGEN_INCLUDE_DIRS}) + set(EIGEN_CONFIGURE_CMD) + set(EIGEN_INSTALL_CMD $(MAKE) install) +else() + set(EIGEN_CONFIGURE_CMD echo 'not installing eigen') + set(EIGEN_INSTALL_CMD echo 'not installing eigen') +endif() + +ExternalProject_Add( + ext_eigen + PREFIX eigen + URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + URL_HASH SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72 + UPDATE_COMMAND "" + CONFIGURE_COMMAND ${EIGEN_CONFIGURE_CMD} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EIGEN_INCLUDE_DESTINATION} + BUILD_COMMAND "" + INSTALL_COMMAND ${EIGEN_INSTALL_CMD} +) + +if(NOT EIGEN_INCLUDE_DESTINATION) + ExternalProject_Get_Property(ext_eigen SOURCE_DIR) + set(EIGEN_INCLUDE_DIRS ${SOURCE_DIR}) +endif() diff --git a/nvblox/thirdparty/stdgpu/stdgpu.cmake b/nvblox/thirdparty/stdgpu/stdgpu.cmake new file mode 100644 index 00000000..c87c941d --- /dev/null +++ b/nvblox/thirdparty/stdgpu/stdgpu.cmake @@ -0,0 +1,52 @@ + + + +include(FetchContent) +FetchContent_Declare( + ext_stdgpu + PREFIX stdgpu + GIT_REPOSITORY https://github.com/stotko/stdgpu.git + GIT_TAG e10f6f3ccc9902d693af4380c3bcd188ec34a2e6 + UPDATE_COMMAND "" +) + +# stdgpu build options +set(STDGPU_BUILD_SHARED_LIBS ON) +set(STDGPU_BUILD_EXAMPLES OFF) +set(STDGPU_BUILD_TESTS OFF) +set(STDGPU_ENABLE_CONTRACT_CHECKS OFF) + +set(STDGPU_BACKEND_DIRECTORY "cuda") # DO we need this? + +# Download the files +FetchContent_MakeAvailable(ext_stdgpu) + +# Variables we need in the parent +set(STDGPU_INCLUDE_DIRS ${STDGPU_INSTALL_DESTINATION}/include/) # "/" is critical. +set(STDGPU_SRC_DIR ${stdgpu_SOURCE_DIR}) + +# Grabbing the compute capability through functions defined by stdgpu +# https://github.com/stotko/stdgpu/tree/master/cmake/cuda + +set(CMAKE_MODULE_PATH_OLD ${CMAKE_MODULE_PATH}) +# Temporary replace CMAKE_MODULE_PATH +set(CMAKE_MODULE_PATH "${STDGPU_SRC_DIR}/cmake/cuda") + +include("${STDGPU_SRC_DIR}/cmake/cuda/set_device_flags.cmake") + +stdgpu_set_device_flags(STDGPU_DEVICE_FLAGS) +stdgpu_cuda_set_architecture_flags(STDGPU_CUDA_ARCHITECTURE_FLAGS) +if(STDGPU_CUDA_ARCHITECTURE_FLAGS) + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) + set(CMAKE_CUDA_ARCHITECTURES ${STDGPU_CUDA_ARCHITECTURE_FLAGS}) + else() + string(APPEND CMAKE_CUDA_FLAGS "${STDGPU_CUDA_ARCHITECTURE_FLAGS}") + message(STATUS "Building with modified CMAKE_CUDA_FLAGS : ${CMAKE_CUDA_FLAGS}") + endif() +else() + message(WARNING "Falling back to default CCs : ${CMAKE_CUDA_ARCHITECTURES}") +endif() + +# Restore CMAKE_MODULE_PATH +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH_OLD}) + diff --git a/nvblox/visualization/visualize_mesh.py b/nvblox/visualization/visualize_mesh.py new file mode 100755 index 00000000..fd09d9ff --- /dev/null +++ b/nvblox/visualization/visualize_mesh.py @@ -0,0 +1,41 @@ +#!/usr/bin/python3 + +import os +import sys +import argparse + +import numpy as np +import open3d as o3d + + +def visualize_ply(ply_path: str, do_normal_coloring: bool): + # Load the mesh. + mesh = o3d.io.read_triangle_mesh(ply_path) + print(mesh) + mesh.compute_vertex_normals() + + # Create a window. + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + # Color the mesh by normals. + if do_normal_coloring: + vis.get_render_option().mesh_color_option = \ + o3d.visualization.MeshColorOption.Normal + vis.run() + vis.destroy_window() + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Visualize a PLY mesh.") + parser.add_argument("path", metavar="path", type=str, + help="Path to the .ply file or file to visualize.") + + parser.add_argument("--normal_coloring", dest="do_normal_coloring", action='store_const', + const=True, default=False, + help="Flag indicating if we should just color the mesh by normals.") + + args = parser.parse_args() + if args.path: + visualize_ply(args.path, args.do_normal_coloring)