diff --git a/CMakeLists.txt b/CMakeLists.txt index 772063235..a3b4487d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,6 +312,7 @@ set(TARGET_CORE_SOURCES src/pipeline/node/AprilTag.cpp src/pipeline/node/ObjectTracker.cpp src/pipeline/node/IMU.cpp + src/pipeline/node/KeypointsParser.cpp src/pipeline/node/EdgeDetector.cpp src/pipeline/node/SPIIn.cpp src/pipeline/node/FeatureTracker.cpp @@ -328,6 +329,7 @@ set(TARGET_CORE_SOURCES src/pipeline/datatype/Buffer.cpp src/pipeline/datatype/ImgFrame.cpp src/pipeline/datatype/ImgTransformations.cpp + src/pipeline/datatype/Keypoints.cpp src/pipeline/datatype/EncodedFrame.cpp src/pipeline/datatype/ImageManipConfig.cpp src/pipeline/datatype/ImageManipConfigV2.cpp diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 17cdbb3f4..324604f76 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -95,6 +95,7 @@ set(SOURCE_LIST src/pipeline/node/SpatialDetectionNetworkBindings.cpp src/pipeline/node/ObjectTrackerBindings.cpp src/pipeline/node/IMUBindings.cpp + src/pipeline/node/KeypointsParserBindings.cpp src/pipeline/node/EdgeDetectorBindings.cpp src/pipeline/node/FeatureTrackerBindings.cpp src/pipeline/node/ToFBindings.cpp @@ -126,6 +127,7 @@ set(SOURCE_LIST src/pipeline/datatype/ImgFrameBindings.cpp src/pipeline/datatype/EncodedFrameBindings.cpp src/pipeline/datatype/IMUDataBindings.cpp + src/pipeline/datatype/KeypointsBindings.cpp src/pipeline/datatype/MessageGroupBindings.cpp src/pipeline/datatype/NNDataBindings.cpp src/pipeline/datatype/SpatialImgDetectionsBindings.cpp diff --git a/bindings/python/src/DatatypeBindings.cpp b/bindings/python/src/DatatypeBindings.cpp index 0e9e8aa1e..7625b3191 100644 --- a/bindings/python/src/DatatypeBindings.cpp +++ b/bindings/python/src/DatatypeBindings.cpp @@ -17,6 +17,7 @@ void bind_imgdetections(pybind11::module& m, void* pCallstack); void bind_imgframe(pybind11::module& m, void* pCallstack); void bind_encodedframe(pybind11::module& m, void* pCallstack); void bind_imudata(pybind11::module& m, void* pCallstack); +void bind_keypoints(pybind11::module& m, void* pCallstack); void bind_message_group(pybind11::module& m, void* pCallstack); void bind_nndata(pybind11::module& m, void* pCallstack); void bind_spatialimgdetections(pybind11::module& m, void* pCallstack); @@ -51,6 +52,7 @@ void DatatypeBindings::addToCallstack(std::deque& callstack) { callstack.push_front(bind_imgframe); callstack.push_front(bind_encodedframe); callstack.push_front(bind_imudata); + callstack.push_front(bind_keypoints); callstack.push_front(bind_message_group); callstack.push_front(bind_nndata); callstack.push_front(bind_spatialimgdetections); @@ -96,6 +98,7 @@ void DatatypeBindings::bind(pybind11::module& m, void* pCallstack){ .value("ImageManipConfigV2", DatatypeEnum::ImageManipConfigV2) .value("CameraControl", DatatypeEnum::CameraControl) .value("ImgDetections", DatatypeEnum::ImgDetections) + .value("Keypoints", DatatypeEnum::Keypoints) .value("SpatialImgDetections", DatatypeEnum::SpatialImgDetections) .value("SystemInformation", DatatypeEnum::SystemInformation) .value("SpatialLocationCalculatorConfig", DatatypeEnum::SpatialLocationCalculatorConfig) diff --git a/bindings/python/src/pipeline/datatype/KeypointsBindings.cpp b/bindings/python/src/pipeline/datatype/KeypointsBindings.cpp new file mode 100644 index 000000000..4265050ec --- /dev/null +++ b/bindings/python/src/pipeline/datatype/KeypointsBindings.cpp @@ -0,0 +1,58 @@ +#include "DatatypeBindings.hpp" +#include "pipeline/CommonBindings.hpp" + +// depthai +#include "depthai/pipeline/datatype/Keypoints.hpp" +//pybind +#include +#include + + +void bind_keypoints(pybind11::module& m, void* pCallstack){ + + using namespace dai; + + py::class_, Buffer, std::shared_ptr> keypoints(m, "Keypoints", DOC(dai, Keypoints)); + py::class_ keypoint(m, "Keypoint", DOC(dai, Keypoint)); + + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + // Call the rest of the type defines, then perform the actual bindings + Callstack* callstack = (Callstack*) pCallstack; + auto cb = callstack->top(); + callstack->pop(); + cb(m, pCallstack); + // Actual bindings + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + + // Single point struct + keypoint + .def(py::init<>()) + .def_readwrite("x", &Keypoint::x) + .def_readwrite("y", &Keypoint::y) + .def_readwrite("z", &Keypoint::z) + .def_readwrite("confidence", &Keypoint::confidence) + ; + + // Message + keypoints + .def(py::init<>()) + + // getters + .def("getTimestamp", &Keypoints::Buffer::getTimestamp, DOC(dai, Buffer, getTimestamp)) + .def("getTimestampDevice", &Keypoints::Buffer::getTimestampDevice, DOC(dai, Buffer, getTimestampDevice)) + .def("getSequenceNum", &Keypoints::Buffer::getSequenceNum, DOC(dai, Buffer, getSequenceNum)) + .def("getKeypoints", &Keypoints::getKeypoints, DOC(dai, Keypoints, getKeypoints)) + + // setters + .def("setTimestamp", &Keypoints::Buffer::setTimestamp, py::arg("timestamp"), DOC(dai, Buffer, setTimestamp)) + .def("setTimestampDevice", &Keypoints::Buffer::setTimestampDevice, DOC(dai, Buffer, setTimestampDevice)) + .def("setSequenceNum", &Keypoints::Buffer::setSequenceNum, DOC(dai, Buffer, setSequenceNum)) + // Binds only the overload that takes Keypoint objects + .def("setKeypoints", py::overload_cast&>(&Keypoints::setKeypoints), DOC(dai, Keypoints, setKeypoints)) + ; + +} diff --git a/bindings/python/src/pipeline/node/KeypointsParserBindings.cpp b/bindings/python/src/pipeline/node/KeypointsParserBindings.cpp new file mode 100644 index 000000000..52890a1aa --- /dev/null +++ b/bindings/python/src/pipeline/node/KeypointsParserBindings.cpp @@ -0,0 +1,56 @@ +#include "Common.hpp" +#include "NodeBindings.hpp" + +#include "depthai/pipeline/Node.hpp" +#include "depthai/pipeline/Pipeline.hpp" +#include "depthai/properties/KeypointsParserProperties.hpp" +#include "depthai/pipeline/node/KeypointsParser.hpp" + +void bind_keypointsparser(pybind11::module& m, void* pCallstack){ + + using namespace dai; + using namespace dai::node; + + // Node and Properties declare upfront + py::class_ keypointsParserProperties(m, "keypointsParserProperties", DOC(dai, KeypointsParserProperties)); + auto keypointsParser = ADD_NODE(KeypointsParser); + + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + // Call the rest of the type defines, then perform the actual bindings + Callstack* callstack = (Callstack*) pCallstack; + auto cb = callstack->top(); + callstack->pop(); + cb(m, pCallstack); + // Actual bindings + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////// + + // Properties + keypointsParserProperties + .def_readwrite("scaleFactor", &KeypointsParserProperties::scaleFactor) + .def_readwrite("numKeypoints", &KeypointsParserProperties::numKeypoints) + ; + + // Node + keypointsParser + .def_readonly("input", &KeypointsParser::input, DOC(dai, node, KeypointsParser, input)) + .def_readonly("out", &KeypointsParser::out, DOC(dai, node, KeypointsParser, out)) + + .def("build", &KeypointsParser::build, DOC(dai, node, KeypointsParser, build)) + + // getters + .def("runOnHost", &KeypointsParser::runOnHost, DOC(dai, node, KeypointsParser, runOnHost)) + .def("getScaleFactor", &KeypointsParser::getScaleFactor, DOC(dai, node, KeypointsParser, getScaleFactor)) + .def("getNumKeypoints", &KeypointsParser::getNumKeypoints, DOC(dai, node, KeypointsParser, getNumKeypoints)) + + // setters + .def("setRunOnHost", &KeypointsParser::setRunOnHost, DOC(dai, node, KeypointsParser, setRunOnHost)) + .def("setScaleFactor", &KeypointsParser::setScaleFactor, DOC(dai, node, KeypointsParser, setScaleFactor)) + .def("setNumKeypoints", &KeypointsParser::setNumKeypoints, DOC(dai, node, KeypointsParser, setNumKeypoints)) + ; + daiNodeModule.attr("KeypointsParser").attr("Properties") = keypointsParserProperties; + +} diff --git a/bindings/python/src/pipeline/node/NodeBindings.cpp b/bindings/python/src/pipeline/node/NodeBindings.cpp index 63e0bd18e..29922e4ce 100644 --- a/bindings/python/src/pipeline/node/NodeBindings.cpp +++ b/bindings/python/src/pipeline/node/NodeBindings.cpp @@ -147,6 +147,7 @@ void bind_spatiallocationcalculator(pybind11::module& m, void* pCallstack); void bind_spatialdetectionnetwork(pybind11::module& m, void* pCallstack); void bind_objecttracker(pybind11::module& m, void* pCallstack); void bind_imu(pybind11::module& m, void* pCallstack); +void bind_keypointsparser(pybind11::module& m, void* pCallstack); void bind_edgedetector(pybind11::module& m, void* pCallstack); void bind_featuretracker(pybind11::module& m, void* pCallstack); void bind_apriltag(pybind11::module& m, void* pCallstack); @@ -192,6 +193,7 @@ void NodeBindings::addToCallstack(std::deque& callstack) { callstack.push_front(bind_spatialdetectionnetwork); callstack.push_front(bind_objecttracker); callstack.push_front(bind_imu); + callstack.push_front(bind_keypointsparser); callstack.push_front(bind_edgedetector); callstack.push_front(bind_featuretracker); callstack.push_front(bind_apriltag); diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 7e07ca8cd..f5392c5eb 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -483,6 +483,10 @@ dai_add_example(threaded_host_node HostNodes/threaded_host_node.cpp ON OFF) # Model zoo dai_add_example(model_zoo RVC2/ModelZoo/model_zoo.cpp ON OFF) +# Keypoints parser +dai_add_example(keypoints_device RVC2/parsers/keypoints_device.cpp ON OFF) +dai_add_example(keypoints_host RVC2/parsers/keypoints_host.cpp ON OFF) + if(DEPTHAI_RTABMAP_SUPPORT) include(FetchContent) FetchContent_Declare(rerun_sdk URL https://github.com/rerun-io/rerun/releases/download/0.16.1/rerun_cpp_sdk.zip) diff --git a/examples/cpp/RVC2/parsers/keypoints_device.cpp b/examples/cpp/RVC2/parsers/keypoints_device.cpp new file mode 100644 index 000000000..993778216 --- /dev/null +++ b/examples/cpp/RVC2/parsers/keypoints_device.cpp @@ -0,0 +1,49 @@ +#include + +int main() { + dai::NNModelDescription modelDescription; + modelDescription.modelSlug = "mediapipe-face-landmarker"; + modelDescription.modelVersionSlug = "192x192"; + modelDescription.platform = "RVC2"; + std::string archivePath = dai::getModelFromZoo(modelDescription, true); + dai::NNArchive nnArchive(archivePath); + + dai::Pipeline pipeline; + + auto cam = pipeline.create()->build(); + auto largeOutput = cam->requestOutput(std::pair(720, 720), dai::ImgFrame::Type::BGR888p); + + auto manip = pipeline.create(); + manip->initialConfig.setResize(192, 192); + largeOutput->link(manip->inputImage); + + auto nn = pipeline.create()->build(manip->out, nnArchive); + + auto parser = pipeline.create()->build(nnArchive); + nn->out.link(parser->input); + + auto videoQ = largeOutput->createOutputQueue(); + auto keypointsQ = parser->out.createOutputQueue(); + + pipeline.start(); + + while(pipeline.isRunning()) { + auto frame = videoQ->get(); + auto cvFrame = frame->getCvFrame(); + auto keypoints = keypointsQ->get()->keypoints; + + for (auto keypoint : keypoints) { + int x = keypoint.x * frame->getWidth(); + int y = keypoint.y * frame->getHeight(); + cv::circle(cvFrame, cv::Point(x, y), 2, cv::Scalar(0, 255, 0), -1); + } + + cv::imshow("Display", cvFrame); + auto key = cv::waitKey(1); + if(key == 'q' || key == 'Q') { + break; + } + } + + return 0; +} diff --git a/examples/cpp/RVC2/parsers/keypoints_host.cpp b/examples/cpp/RVC2/parsers/keypoints_host.cpp new file mode 100644 index 000000000..c44f6ec3a --- /dev/null +++ b/examples/cpp/RVC2/parsers/keypoints_host.cpp @@ -0,0 +1,50 @@ +#include + +int main() { + dai::NNModelDescription modelDescription; + modelDescription.modelSlug = "mediapipe-face-landmarker"; + modelDescription.modelVersionSlug = "192x192"; + modelDescription.platform = "RVC2"; + std::string archivePath = dai::getModelFromZoo(modelDescription, true); + dai::NNArchive nnArchive(archivePath); + + dai::Pipeline pipeline; + + auto cam = pipeline.create()->build(); + auto largeOutput = cam->requestOutput(std::pair(720, 720), dai::ImgFrame::Type::BGR888p); + + auto manip = pipeline.create(); + manip->initialConfig.setResize(192, 192); + largeOutput->link(manip->inputImage); + + auto nn = pipeline.create()->build(manip->out, nnArchive); + + auto parser = pipeline.create()->build(nnArchive); + parser->setRunOnHost(true); + nn->out.link(parser->input); + + auto videoQ = largeOutput->createOutputQueue(); + auto keypointsQ = parser->out.createOutputQueue(); + + pipeline.start(); + + while(pipeline.isRunning()) { + auto frame = videoQ->get(); + auto cvFrame = frame->getCvFrame(); + auto keypoints = keypointsQ->get()->keypoints; + + for (auto keypoint : keypoints) { + int x = keypoint.x * frame->getWidth(); + int y = keypoint.y * frame->getHeight(); + cv::circle(cvFrame, cv::Point(x, y), 2, cv::Scalar(0, 255, 0), -1); + } + + cv::imshow("Display", cvFrame); + auto key = cv::waitKey(1); + if(key == 'q' || key == 'Q') { + break; + } + } + + return 0; +} diff --git a/examples/python/RVC2/parsers/keypoints.py b/examples/python/RVC2/parsers/keypoints.py new file mode 100644 index 000000000..57da3babc --- /dev/null +++ b/examples/python/RVC2/parsers/keypoints.py @@ -0,0 +1,46 @@ +import depthai as dai + +import cv2 + +RUN_ON_HOST = True + +modelDescription = dai.NNModelDescription(modelSlug="mediapipe-face-landmarker", modelVersionSlug="192x192", platform="RVC2") +archivePath = dai.getModelFromZoo(modelDescription, useCached=True) +nnArchive = dai.NNArchive(archivePath) + +with dai.Pipeline() as pipeline: + + print("Creating pipeline...") + cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A) + full = cam.requestOutput((720, 720), dai.ImgFrame.Type.BGR888p) + + manip = pipeline.create(dai.node.ImageManip) + manip.initialConfig.setResize(192, 192) + full.link(manip.inputImage) + + nn = pipeline.create(dai.node.NeuralNetwork).build( + input=manip.out, + nnArchive=nnArchive + ) + + parser = pipeline.create(dai.node.KeypointsParser).build(nnArchive) + parser.setRunOnHost(RUN_ON_HOST) + nn.out.link(parser.input) + + video_q = full.createOutputQueue() + keypoints_q = parser.out.createOutputQueue() + + pipeline.start() + + while pipeline.isRunning(): + frame = video_q.get().getCvFrame() + keypoints: dai.Keypoints = keypoints_q.get() + + for keypoint in keypoints.getKeypoints(): + x, y = keypoint.x, keypoint.y + x, y = int(x * frame.shape[1]), int(y * frame.shape[0]) + frame = cv2.circle(frame, (int(x), int(y)), 2, (0, 255, 0), -1) + + cv2.imshow("Frame", frame) + if cv2.waitKey(1) == ord("q"): + break diff --git a/include/depthai/pipeline/datatype/DatatypeEnum.hpp b/include/depthai/pipeline/datatype/DatatypeEnum.hpp index d3456571b..7d2c8820c 100644 --- a/include/depthai/pipeline/datatype/DatatypeEnum.hpp +++ b/include/depthai/pipeline/datatype/DatatypeEnum.hpp @@ -24,6 +24,7 @@ enum class DatatypeEnum : std::int32_t { AprilTags, Tracklets, IMUData, + Keypoints, StereoDepthConfig, FeatureTrackerConfig, ToFConfig, diff --git a/include/depthai/pipeline/datatype/Keypoints.hpp b/include/depthai/pipeline/datatype/Keypoints.hpp new file mode 100644 index 000000000..9ce227f25 --- /dev/null +++ b/include/depthai/pipeline/datatype/Keypoints.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include +#include + +// project +#include "depthai/pipeline/datatype/Buffer.hpp" + +// shared +#include "depthai/common/Point2f.hpp" +#include "depthai/common/Point3f.hpp" +#include "depthai/common/optional.hpp" + +namespace dai { + +struct Keypoint { + float x = 0.f; + float y = 0.f; + std::optional z; + std::optional confidence;; +}; + +DEPTHAI_SERIALIZE_EXT(Keypoint, x, y, z, confidence); + +/** + * Keypoints message. Carries keypoints data. + */ + class Keypoints : public Buffer { + public: + /** + * Construct Keypoints message. + */ + Keypoints() = default; + ~Keypoints() override = default; + + /// Keypoints + std::vector keypoints; + + void serialize(std::vector& metadata, DatatypeEnum& datatype) const override { + metadata = utility::serialize(*this); + datatype = DatatypeEnum::Keypoints; + }; + + // getters + const std::vector& getKeypoints() const; + + // setters + Keypoints& setKeypoints(const std::vector& keypoints); + + /** + * From 3D points + * + * @param keypoints detected 3D keypoints + * @param scores confidence scores for each keypoint + * @param confidence_threshold confidence threshold, filters out keypoints with confidence below threshold + * + * @returns keypoints message + */ + Keypoints& setKeypoints(const std::vector& points); + Keypoints& setKeypoints(const std::vector& points, const std::vector& scores); + Keypoints& setKeypoints(const std::vector& points, const std::vector& scores, float confidenceThreshold); + + /** + * From 2D points + * + * @param keypoints detected 2D keypoints + * @param scores confidence scores for each keypoint + * @param confidence_threshold confidence threshold, filters out keypoints with confidence below threshold + * + * @returns keypoints message + */ + Keypoints& setKeypoints(const std::vector& points); + Keypoints& setKeypoints(const std::vector& points, const std::vector& scores); + Keypoints& setKeypoints(const std::vector& points, const std::vector& scores, float confidenceThreshold); + + DEPTHAI_SERIALIZE(Keypoints, Buffer::sequenceNum, Buffer::ts, Buffer::tsDevice, keypoints); +}; + +} // namespace dai diff --git a/include/depthai/pipeline/datatype/NNData.hpp b/include/depthai/pipeline/datatype/NNData.hpp index 4a7aecfed..b9f0961ad 100644 --- a/include/depthai/pipeline/datatype/NNData.hpp +++ b/include/depthai/pipeline/datatype/NNData.hpp @@ -494,18 +494,6 @@ class NNData : public Buffer { } } - /** - * Get the datatype of a given tensor - * @returns TensorInfo::DataType tensor datatype - */ - TensorInfo::DataType getTensorDatatype(const std::string& name); - - /** - * Get the datatype of the first tensor - * @returns TensorInfo::DataType tensor datatype - */ - TensorInfo::DataType getFirstTensorDatatype(); - /** * Convenience function to retrieve values from the first tensor * @returns xt::xarray<_Ty> tensor @@ -519,6 +507,18 @@ class NNData : public Buffer { return {}; } #endif + /** + * Get the datatype of a given tensor + * @returns TensorInfo::DataType tensor datatype + */ + TensorInfo::DataType getTensorDatatype(const std::string& name); + + /** + * Get the datatype of the first tensor + * @returns TensorInfo::DataType tensor datatype + */ + TensorInfo::DataType getFirstTensorDatatype(); + void serialize(std::vector& metadata, DatatypeEnum& datatype) const override { metadata = utility::serialize(*this); datatype = DatatypeEnum::NNData; diff --git a/include/depthai/pipeline/datatypes.hpp b/include/depthai/pipeline/datatypes.hpp index 35285d045..e8e2408c2 100644 --- a/include/depthai/pipeline/datatypes.hpp +++ b/include/depthai/pipeline/datatypes.hpp @@ -12,6 +12,7 @@ #include "datatype/EncodedFrame.hpp" #include "datatype/FeatureTrackerConfig.hpp" #include "datatype/IMUData.hpp" +#include "datatype/Keypoints.hpp" #include "datatype/ImageManipConfig.hpp" #include "datatype/ImageManipConfigV2.hpp" #include "datatype/ImgDetections.hpp" diff --git a/include/depthai/pipeline/node/KeypointsParser.hpp b/include/depthai/pipeline/node/KeypointsParser.hpp new file mode 100644 index 000000000..d46af0564 --- /dev/null +++ b/include/depthai/pipeline/node/KeypointsParser.hpp @@ -0,0 +1,103 @@ +#pragma once + +// project +#include "depthai/pipeline/DeviceNode.hpp" +#include "depthai/pipeline/datatype/NNData.hpp" +#include "depthai/properties/KeypointsParserProperties.hpp" +#include "depthai/pipeline/datatype/Keypoints.hpp" + +// shared +#include "depthai/common/Point2f.hpp" +#include "depthai/common/Point3f.hpp" +#include "depthai/nn_archive/NNArchive.hpp" + +#if defined(__clang__) + #if __has_warning("-Wswitch-enum") + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wswitch-enum" + #endif +#elif defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wswitch-enum" +#elif defined(_MSC_VER) + #pragma warning(push) + #pragma warning(disable : 4061) +#endif +#ifdef DEPTHAI_XTENSOR_SUPPORT + #include "xtensor/xarray.hpp" + #include "xtensor/xmath.hpp" +#endif +#if defined(__clang__) + #if __has_warning("-Wswitch-enum") + #pragma clang diagnostic pop + #endif +#elif defined(__GNUC__) + #pragma GCC diagnostic pop +#elif defined(_MSC_VER) + #pragma warning(pop) +#endif + +namespace dai { +namespace node { + +/** + * @brief Keypoint detections parser node. + */ +class KeypointsParser : public DeviceNodeCRTP, public HostRunnable { + private: + bool isBuilt = false; + bool runOnHostVar = false; + + public: + constexpr static const char* NAME = "KeypointsParser"; + using DeviceNodeCRTP::DeviceNodeCRTP; + + /** + * Input message with keypoint data from the neural network. + */ + Input input{*this, {"input", DEFAULT_GROUP, DEFAULT_BLOCKING, DEFAULT_QUEUE_SIZE, {{{DatatypeEnum::NNData, true}}}, DEFAULT_WAIT_FOR_MESSAGE}}; + + /** + * Outputs Keypoints message that carries the detected keypoints. + */ + Output out{*this, {"out", DEFAULT_GROUP, {{{DatatypeEnum::Keypoints, false}}}}}; + + /** + * Extract scale factor and number of keypoints from the NNArchive. + */ + std::shared_ptr build(const NNArchive& nnArchive); + + /** + * Set the scale factor to divide the keypoints by. + */ + void setScaleFactor(float scaleFactor); + /** + * Get the scale factor to divide the keypoints by. + */ + float getScaleFactor() const; + + /** + * Set the number of keypoints. + */ + void setNumKeypoints(int numKeypoints); + /** + * Get the number of keypoints. + */ + int getNumKeypoints() const; + + /** + * Specify whether to run on host or device + * By default, the node will run on device. + */ + void setRunOnHost(bool runOnHost); + /** + * Check if the node is set to run on host + */ + bool runOnHost() const override; + + void run() override; + +}; + +} // namespace node +} // namespace dai diff --git a/include/depthai/pipeline/nodes.hpp b/include/depthai/pipeline/nodes.hpp index c499ff199..16747c230 100644 --- a/include/depthai/pipeline/nodes.hpp +++ b/include/depthai/pipeline/nodes.hpp @@ -13,6 +13,7 @@ #include "node/IMU.hpp" #include "node/ImageManip.hpp" #include "node/ImageManipV2.hpp" +#include "node/KeypointsParser.hpp" #include "node/MessageDemux.hpp" #include "node/MonoCamera.hpp" #include "node/NeuralNetwork.hpp" diff --git a/include/depthai/properties/KeypointsParserProperties.hpp b/include/depthai/properties/KeypointsParserProperties.hpp new file mode 100644 index 000000000..7a02bb2a3 --- /dev/null +++ b/include/depthai/properties/KeypointsParserProperties.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include "depthai/properties/Properties.hpp" + +namespace dai { + +/** + * Specify properties for KeypointsParser. + */ +struct KeypointsParserProperties : PropertiesSerializable { + /** + * The scale factor to divide the keypoints by. + */ + float scaleFactor = 1; + + /** + * The number of keypoints. Default value is invalid, must be specified. + */ + int numKeypoints = -1; +}; + +DEPTHAI_SERIALIZE_EXT(KeypointsParserProperties, scaleFactor, numKeypoints); + +} // namespace dai diff --git a/src/pipeline/datatype/DatatypeEnum.cpp b/src/pipeline/datatype/DatatypeEnum.cpp index d31e46dc1..801923a57 100644 --- a/src/pipeline/datatype/DatatypeEnum.cpp +++ b/src/pipeline/datatype/DatatypeEnum.cpp @@ -27,6 +27,7 @@ const std::unordered_map> hierarchy = { DatatypeEnum::EdgeDetectorConfig, DatatypeEnum::Tracklets, DatatypeEnum::IMUData, + DatatypeEnum::Keypoints, DatatypeEnum::StereoDepthConfig, DatatypeEnum::FeatureTrackerConfig, DatatypeEnum::ToFConfig, @@ -56,6 +57,7 @@ const std::unordered_map> hierarchy = { DatatypeEnum::EdgeDetectorConfig, DatatypeEnum::Tracklets, DatatypeEnum::IMUData, + DatatypeEnum::Keypoints, DatatypeEnum::StereoDepthConfig, DatatypeEnum::FeatureTrackerConfig, DatatypeEnum::ToFConfig, @@ -83,6 +85,7 @@ const std::unordered_map> hierarchy = { {DatatypeEnum::EdgeDetectorConfig, {}}, {DatatypeEnum::Tracklets, {}}, {DatatypeEnum::IMUData, {}}, + {DatatypeEnum::Keypoints, {}}, {DatatypeEnum::StereoDepthConfig, {}}, {DatatypeEnum::FeatureTrackerConfig, {}}, {DatatypeEnum::ToFConfig, {}}, diff --git a/src/pipeline/datatype/Keypoints.cpp b/src/pipeline/datatype/Keypoints.cpp new file mode 100644 index 000000000..623188133 --- /dev/null +++ b/src/pipeline/datatype/Keypoints.cpp @@ -0,0 +1,137 @@ +#include "depthai/pipeline/datatype/Keypoints.hpp" + +namespace dai { + +const std::vector& Keypoints::getKeypoints() const { + return keypoints; +} + +Keypoints& Keypoints::setKeypoints(const std::vector& keypoints) { + this->keypoints = keypoints; + + return *this; +} + +// 3D keypoints +Keypoints& Keypoints::setKeypoints(const std::vector& points) { + std::vector keypoints = std::vector(points.size()); + for (size_t i = 0; i < points.size(); i++) { + keypoints[i].x = points[i].x; + keypoints[i].y = points[i].y; + keypoints[i].z = points[i].z; + }; + + return this->setKeypoints(keypoints); +} +Keypoints& Keypoints::setKeypoints(const std::vector& points, const std::vector& scores) { + if (points.size() != scores.size()) { + throw std::invalid_argument("Keypoints and scores should have the same length. Got " + + std::to_string(points.size()) + + " keypoints and " + + std::to_string(scores.size()) + + " scores."); + } + + for (const auto& score: scores) { + if (0 > score || score > 1) { + throw std::invalid_argument("Scores should only contain values between 0 and 1."); + } + } + + std::vector keypoints = std::vector(points.size()); + for (size_t i = 0; i < points.size(); i++) { + keypoints[i].x = points[i].x; + keypoints[i].y = points[i].y; + keypoints[i].z = points[i].z; + keypoints[i].confidence = scores[i]; + }; + + return this->setKeypoints(keypoints); +} +Keypoints& Keypoints::setKeypoints(const std::vector& points, const std::vector& scores, float confidenceThreshold) { + if (points.size() != scores.size()) { + throw std::invalid_argument("Keypoints and scores should have the same length. Got " + + std::to_string(points.size()) + + " keypoints and " + + std::to_string(scores.size()) + + " scores."); + } + + if (0 > confidenceThreshold || confidenceThreshold > 1) { + throw std::invalid_argument("Confidence threshold should be between 0 and 1. Got " + std::to_string(confidenceThreshold) + "."); + } + + std::vector filteredPoints = std::vector(); + std::vector filteredScores = std::vector(); + + for (size_t i = 0; i < points.size(); i++) { + if (scores[i] >= confidenceThreshold) { + filteredPoints.push_back(points[i]); + filteredScores.push_back(scores[i]); + } + } + + return this->setKeypoints(filteredPoints, filteredScores); +} + +// 2D keypoints +Keypoints& Keypoints::setKeypoints(const std::vector& points) { + std::vector keypoints = std::vector(points.size()); + for (size_t i = 0; i < points.size(); i++) { + keypoints[i].x = points[i].x; + keypoints[i].y = points[i].y; + }; + + return this->setKeypoints(keypoints); +} +Keypoints& Keypoints::setKeypoints(const std::vector& points, const std::vector& scores) { + if (points.size() != scores.size()) { + throw std::invalid_argument("Keypoints and scores should have the same length. Got " + + std::to_string(points.size()) + + " keypoints and " + + std::to_string(scores.size()) + + " scores."); + } + + for (const auto& score: scores) { + if (0 > score || score > 1) { + throw std::invalid_argument("Scores should only contain values between 0 and 1."); + } + } + + std::vector keypoints = std::vector(points.size()); + for (size_t i = 0; i < points.size(); i++) { + keypoints[i].x = points[i].x; + keypoints[i].y = points[i].y; + keypoints[i].confidence = scores[i]; + }; + + return this->setKeypoints(keypoints); +} +Keypoints& Keypoints::setKeypoints(const std::vector& points, const std::vector& scores, float confidenceThreshold) { + if (points.size() != scores.size()) { + throw std::invalid_argument("Keypoints and scores should have the same length. Got " + + std::to_string(points.size()) + + " keypoints and " + + std::to_string(scores.size()) + + " scores."); + } + + if (0 > confidenceThreshold || confidenceThreshold > 1) { + throw std::invalid_argument("Confidence threshold should be between 0 and 1. Got " + std::to_string(confidenceThreshold) + "."); + } + + std::vector filteredPoints = std::vector(); + std::vector filteredScores = std::vector(); + + for (size_t i = 0; i < points.size(); i++) { + if (scores[i] >= confidenceThreshold) { + filteredPoints.push_back(points[i]); + filteredScores.push_back(scores[i]); + } + } + + return this->setKeypoints(filteredPoints, filteredScores); +} + +} // namespace dai diff --git a/src/pipeline/node/KeypointsParser.cpp b/src/pipeline/node/KeypointsParser.cpp new file mode 100644 index 000000000..5dbbe10ba --- /dev/null +++ b/src/pipeline/node/KeypointsParser.cpp @@ -0,0 +1,113 @@ +#include "depthai/pipeline/node/KeypointsParser.hpp" + +namespace dai { +namespace node { + +std::shared_ptr KeypointsParser::build(const NNArchive& nnArchive) { + if(isBuilt) { + throw std::runtime_error("KeypointsParser node is already built"); + } + + if (nnArchive.getConfig().getConfigV1().has_value() + && nnArchive.getConfig().getConfigV1().value().model.heads.has_value() + && !nnArchive.getConfig().getConfigV1().value().model.heads.value().empty()) { + nlohmann::json metadata = nnArchive.getConfig().getConfigV1().value().model.heads.value()[0].metadata.extraParams; + + if (metadata.contains("n_keypoints")) { + setNumKeypoints(metadata["n_keypoints"]); + } + if (metadata.contains("scale_factor")) { + setScaleFactor(metadata["scale_factor"]); + } + } + + isBuilt = true; + return std::static_pointer_cast(shared_from_this()); +} + +void KeypointsParser::setScaleFactor(float scaleFactor) { + properties.scaleFactor = scaleFactor; +} +float KeypointsParser::getScaleFactor() const { + return properties.scaleFactor; +} + +void KeypointsParser::setNumKeypoints(int numKeypoints) { + properties.numKeypoints = numKeypoints; +} +int KeypointsParser::getNumKeypoints() const { + return properties.numKeypoints; +} + +void KeypointsParser::setRunOnHost(bool runOnHost) { + runOnHostVar = runOnHost; +} +bool KeypointsParser::runOnHost() const { + return runOnHostVar; +} + +#ifdef DEPTHAI_XTENSOR_SUPPORT +void KeypointsParser::run() { + auto numKeypoints = properties.numKeypoints; + auto scaleFactor = properties.scaleFactor; + + if(numKeypoints == -1) { + throw std::runtime_error("Number of keypoints must be specified!"); + } + + while(isRunning()) { + auto inputData = input.get(); + if(inputData == nullptr) { + throw std::invalid_argument("Received nullptr from input"); + } + + std::vector outputLayerNames = inputData->getAllLayerNames(); + if (outputLayerNames.size() != 1) { + throw std::invalid_argument("Expected 1 output layer, got " + std::to_string(outputLayerNames.size())); + } + + xt::xarray keypointsData = inputData->getFirstTensor(true); + int totalCoords = std::accumulate(keypointsData.shape().begin(), keypointsData.shape().end(), 1, std::multiplies()); + + if (numKeypoints * 2 != totalCoords && numKeypoints * 3 != totalCoords) { + throw std::runtime_error("Expected 2 or 3 coordinates per keypoint, got " + std::to_string(static_cast(totalCoords) / static_cast(numKeypoints))); + } + int pointDimension = totalCoords / numKeypoints; + + keypointsData = keypointsData.reshape({numKeypoints, pointDimension}); + keypointsData /= scaleFactor; + + std::shared_ptr msg = std::make_shared(); + + if (pointDimension == 2) { + std::vector keypoints = std::vector(numKeypoints); + for (int i = 0; i < numKeypoints; i++) { + keypoints[i].x = keypointsData(i, 0); + keypoints[i].y = keypointsData(i, 1); + } + msg->setKeypoints(keypoints); + } + else { + std::vector keypoints = std::vector(numKeypoints); + for (int i = 0; i < numKeypoints; i++) { + keypoints[i].x = keypointsData(i, 0); + keypoints[i].y = keypointsData(i, 1); + keypoints[i].z = keypointsData(i, 2); + } + msg->setKeypoints(keypoints); + } + + msg->setTimestamp(inputData->getTimestamp()); + msg->setTimestampDevice(inputData->getTimestampDevice()); + msg->setSequenceNum(inputData->getSequenceNum()); + out.send(msg); + } +} +#else +void KeypointsParser::run() { + throw std::runtime_error("KeypointsParser node requires xtensor support"); +} +#endif + +} // namespace node +} // namespace dai