diff --git a/c++/mpi/array.hpp b/c++/mpi/array.hpp index af6a3f69..cd4a7b5a 100644 --- a/c++/mpi/array.hpp +++ b/c++/mpi/array.hpp @@ -16,7 +16,7 @@ /** * @file - * @brief Provides an MPI broadcast, reduce, scatter and gather for std::vector. + * @brief Provides an MPI broadcast and reduce for `std::array`. */ #pragma once @@ -29,6 +29,8 @@ #include #include +#include +#include namespace mpi { @@ -38,55 +40,65 @@ namespace mpi { */ /** - * @brief Implementation of an MPI broadcast for a std::arr. + * @brief Implementation of an MPI broadcast for a `std::array`. * - * @details It simply calls mpi::broadcast_range with the input array. + * @details It calls mpi::broadcast_range with the given array. * * @tparam T Value type of the array. * @tparam N Size of the array. - * @param arr std::array to broadcast. + * @param arr `std::array` to broadcast (into). * @param c mpi::communicator. * @param root Rank of the root process. */ template void mpi_broadcast(std::array &arr, communicator c = {}, int root = 0) { broadcast_range(arr, c, root); } /** - * @brief Implementation of an in-place MPI reduce for a std::array. + * @brief Implementation of an MPI reduce for a `std::array`. * - * @details It simply calls mpi::reduce_in_place_range with the given input array. + * @details It constructs the output array with its value type equal to the return type of `reduce(std::declval())` + * and calls mpi::reduce_range with the input and constructed output array. + * + * Note that the output array will always have the same size as the input array, no matter if the rank receives the + * reduced data or not. * * @tparam T Value type of the array. * @tparam N Size of the array. - * @param arr std::array to reduce. + * @param arr `std::array` to reduce. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. + * @return `std::array` containing the result of the reduction. */ template - void mpi_reduce_in_place(std::array &arr, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - reduce_in_place_range(arr, c, root, all, op); + auto mpi_reduce(std::array const &arr, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { + using value_t = std::remove_cvref_t()))>; + std::array res{}; + reduce_range(arr, res, c, root, all, op); + return res; } /** - * @brief Implementation of an MPI reduce for a std::array. + * @brief Implementation of an MPI reduce for a `std::array` that reduces directly into an existing output array. * - * @details It simply calls mpi::reduce_range with the given input array and an empty array of the same size. + * @details It calls mpi::reduce_range with the input and output array. The output array must be the same size as the + * input array on receiving ranks. * - * @tparam T Value type of the array. - * @tparam N Size of the array. - * @param arr std::array to reduce. + * @tparam T1 Value type of the array to be reduced. + * @tparam N1 Size of the array to be reduced. + * @tparam T2 Value type of the array to be reduced into. + * @tparam N2 Size of the array to be reduced into. + * @param arr_in `std::array` to reduce. + * @param arr_out `std::array` to reduce into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. - * @return std::array containing the result of each individual reduction. */ - template - auto mpi_reduce(std::array const &arr, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - std::array, N> res{}; - reduce_range(arr, res, c, root, all, op); - return res; + template + void mpi_reduce_into(std::array const &arr_in, std::array &arr_out, communicator c = {}, int root = 0, bool all = false, + MPI_Op op = MPI_SUM) { + reduce_range(arr_in, arr_out, c, root, all, op); } /** @} */ diff --git a/c++/mpi/chunk.hpp b/c++/mpi/chunk.hpp index 2c275646..85466cd1 100644 --- a/c++/mpi/chunk.hpp +++ b/c++/mpi/chunk.hpp @@ -22,11 +22,11 @@ #pragma once #include "./communicator.hpp" +#include "./macros.hpp" #include #include -#include #include namespace mpi { @@ -39,7 +39,7 @@ namespace mpi { * @details The optional parameter `min_size` can be used to first divide the range into equal parts of size * `min_size` before distributing them as evenly as possible across the number of specified subranges. * - * It throws an exception if `min_size < 1` or if it is not a divisor of `end`. + * It is expected that `min_size > 0` and that `min_size` is a divisor of `end`. * * @param end End of the integer range `[0, end)`. * @param nranges Number of subranges. @@ -48,7 +48,7 @@ namespace mpi { * @return Length of the ith subrange. */ [[nodiscard]] inline long chunk_length(long end, int nranges, int i, long min_size = 1) { - if (min_size < 1 || end % min_size != 0) throw std::runtime_error("Error in mpi::chunk_length: min_size must be a divisor of end"); + EXPECTS_WITH_MESSAGE(min_size > 0 && end % min_size == 0, "Error in mpi::chunk_length: min_size must be a divisor of end"); auto [node_begin, node_end] = itertools::chunk_range(0, end / min_size, nranges, i); return (node_end - node_begin) * min_size; } diff --git a/c++/mpi/datatypes.hpp b/c++/mpi/datatypes.hpp index 1857d752..69f3bba7 100644 --- a/c++/mpi/datatypes.hpp +++ b/c++/mpi/datatypes.hpp @@ -91,8 +91,9 @@ namespace mpi { template struct mpi_type : mpi_type {}; /** - * @brief Type trait to check if a type T has a corresponding MPI datatype, i.e. if mpi::mpi_type has been specialized. - * @tparam T Type to be checked. + * @brief Type trait to check if a type `T` has a corresponding MPI datatype, i.e. if mpi::mpi_type has been + * specialized. + * @tparam `T` Type to be checked. */ template constexpr bool has_mpi_type = false; diff --git a/c++/mpi/generic_communication.hpp b/c++/mpi/generic_communication.hpp index 244d19e1..e0d613c7 100644 --- a/c++/mpi/generic_communication.hpp +++ b/c++/mpi/generic_communication.hpp @@ -24,66 +24,59 @@ #pragma once +#include "./communicator.hpp" #include "./datatypes.hpp" -#include "./lazy.hpp" +#include "./macros.hpp" #include "./utils.hpp" #include +#include +#include +#include #include -#include #include namespace mpi { + /** + * @ingroup utilities + * @brief A concept that checks if a range type is contiguous and sized and has an MPI compatible value type. + * @tparam R Range type. + */ + template + concept MPICompatibleRange = std::ranges::contiguous_range && std::ranges::sized_range && has_mpi_type>; + /** * @addtogroup coll_comm * @{ */ - namespace detail { - - // Type trait to check if a type is a std::vector. - template inline constexpr bool is_std_vector = false; - - // Spezialization of is_std_vector for std::vector. - template inline constexpr bool is_std_vector> = true; - - // Convert an object of type V to an object of type T. - template T convert(V v) { - if constexpr (is_std_vector) { - T res; - res.reserve(v.size()); - for (auto &x : v) res.emplace_back(convert(std::move(x))); - return res; - } else - return T{std::move(v)}; - } - - } // namespace detail - /** * @brief Generic MPI broadcast. * - * @details If mpi::has_env is true, this function calls the specialized `mpi_broadcast` function for the given - * object, otherwise it does nothing. + * @details It calls the specialized `mpi_broadcast` function. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. * * @tparam T Type to be broadcasted. - * @param x Object to be broadcasted. + * @param x Object to be broadcasted (into). * @param c mpi::communicator. * @param root Rank of the root process. */ - template [[gnu::always_inline]] void broadcast(T &&x, communicator c = {}, int root = 0) { - static_assert(not std::is_const_v, "mpi::broadcast cannot be called on const objects"); - if (has_env) mpi_broadcast(std::forward(x), c, root); + template [[gnu::always_inline]] void broadcast(T &&x, communicator c = {}, int root = 0) { // NOLINT (forwarding is not needed) + mpi_broadcast(x, c, root); } /** * @brief Generic MPI reduce. * - * @details If mpi::has_env is true or if the return type of the specialized `mpi_reduce` is lazy, this function calls - * the specialized `mpi_reduce` function for the given object. Otherwise, it simply converts the input object to the - * output type `mpi_reduce` would return. + * @details If there is a specialized `mpi_reduce` for the given type, we call it. Otherwise, we call mpi::reduce_into + * with the given input object and a default constructed output object of type `T`. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. * * @tparam T Type to be reduced. * @param x Object to be reduced. @@ -91,140 +84,253 @@ namespace mpi { * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. - * @return The result of the specialized `mpi_reduce` call. + * @return Result of the specialized `mpi_reduce` call. */ template - [[gnu::always_inline]] inline decltype(auto) reduce(T &&x, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - // return type of mpi_reduce - using r_t = decltype(mpi_reduce(std::forward(x), c, root, all, op)); - if constexpr (is_mpi_lazy) { - return mpi_reduce(std::forward(x), c, root, all, op); + [[gnu::always_inline]] decltype(auto) reduce(T &&x, communicator c = {}, int root = 0, bool all = false, // NOLINT (forwarding is not needed) + MPI_Op op = MPI_SUM) { + if constexpr (requires { mpi_reduce(x, c, root, all, op); }) { + return mpi_reduce(x, c, root, all, op); } else { - if (has_env) - return mpi_reduce(std::forward(x), c, root, all, op); - else - return detail::convert(std::forward(x)); + std::remove_cvref_t res; + reduce_into(x, res, c, root, all, op); + return res; } } /** - * @brief Generic in-place MPI reduce. + * @brief Generic in place MPI reduce. + * + * @details We call mpi::reduce_into with the given object as the input and output argument. * - * @details If mpi::has_env is true, this functions calls the specialized `mpi_reduce_in_place` function for the given - * object. Otherwise, it does nothing. + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. * * @tparam T Type to be reduced. - * @param x Object to be reduced. + * @param x Object to be reduced (into). * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. */ template - [[gnu::always_inline]] inline void reduce_in_place(T &&x, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - static_assert(not std::is_const_v, "In-place mpi functions cannot be called on const objects"); - if (has_env) mpi_reduce_in_place(std::forward(x), c, root, all, op); + [[gnu::always_inline]] void reduce_in_place(T &&x, communicator c = {}, int root = 0, bool all = false, // NOLINT (forwarding is not needed) + MPI_Op op = MPI_SUM) { + mpi_reduce_into(x, x, c, root, all, op); + } + + /** + * @brief Generic MPI reduce that reduces directly into an existing output object. + * + * @details It calls the specialized `mpi_reduce_into` function. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. + * + * @tparam T1 Type to be reduced. + * @tparam T2 Type to be reduced into. + * @param x_in Object to be reduced. + * @param x_out Object to be reduced into. + * @param c mpi::communicator. + * @param root Rank of the root process. + * @param all Should all processes receive the result of the reduction. + * @param op `MPI_Op` used in the reduction. + */ + template + [[gnu::always_inline]] void reduce_into(T1 &&x_in, T2 &&x_out, communicator c = {}, int root = 0, // NOLINT (forwarding is not needed) + bool all = false, MPI_Op op = MPI_SUM) { + mpi_reduce_into(x_in, x_out, c, root, all, op); } /** * @brief Generic MPI scatter. * - * @details If mpi::has_env is true or if the return type of the specialized `mpi_scatter` is lazy, this function - * calls the specialized `mpi_scatter` function for the given object. Otherwise, it simply converts the input object - * to the output type `mpi_scatter` would return. + * @details If there is a specialized `mpi_scatter` for the given type, we call it. Otherwise, we call + * mpi::scatter_into with the given input object and a default constructed output object of type `T`. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. * * @tparam T Type to be scattered. * @param x Object to be scattered. * @param c mpi::communicator. * @param root Rank of the root process. - * @return The result of the specialized `mpi_scatter` call. + * @return Result of the specialized `mpi_scatter` call. */ - template [[gnu::always_inline]] inline decltype(auto) scatter(T &&x, mpi::communicator c = {}, int root = 0) { - // return type of mpi_scatter - using r_t = decltype(mpi_scatter(std::forward(x), c, root)); - if constexpr (is_mpi_lazy) { - return mpi_scatter(std::forward(x), c, root); + template + [[gnu::always_inline]] decltype(auto) scatter(T &&x, mpi::communicator c = {}, int root = 0) { // NOLINT (forwarding is not needed) + if constexpr (requires { mpi_scatter(x, c, root); }) { + return mpi_scatter(x, c, root); } else { - if (has_env) - return mpi_scatter(std::forward(x), c, root); - else - return detail::convert(std::forward(x)); + std::remove_cvref_t res; + scatter_into(x, res, c, root); + return res; } } + /** + * @brief Generic MPI scatter that scatters directly into an existing output object. + * + * @details It calls the specialized `mpi_scatter_into` function. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. + * + * @tparam T1 Type to be scattered. + * @tparam T2 Type to be scattered into. + * @param x_in Object to be scattered. + * @param x_out Object to be scattered into. + * @param c mpi::communicator. + * @param root Rank of the root process. + */ + template + [[gnu::always_inline]] void scatter_into(T1 &&x_in, T2 &&x_out, communicator c = {}, int root = 0) { // NOLINT (forwarding is not needed) + mpi_scatter_into(x_in, x_out, c, root); + } + /** * @brief Generic MPI gather. * - * @details If mpi::has_env is true or if the return type of the specialized `mpi_gather` is lazy, this function - * calls the specialized `mpi_gather` function for the given object. Otherwise, it simply converts the input object to - * the output type `mpi_gather` would return. + * @details If there is a specialized `mpi_gather` for the given type, we call it. Otherwise, we call mpi::gather_into + * with the given input object and a default constructed output object of type `T`. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. * * @tparam T Type to be gathered. * @param x Object to be gathered. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the gather. - * @return The result of the specialized `mpi_gather` call. + * @return Result of the specialized `mpi_gather` call. */ - template [[gnu::always_inline]] inline decltype(auto) gather(T &&x, mpi::communicator c = {}, int root = 0, bool all = false) { - // return type of mpi_gather - using r_t = decltype(mpi_gather(std::forward(x), c, root, all)); - if constexpr (is_mpi_lazy) { - return mpi_gather(std::forward(x), c, root, all); + template + [[gnu::always_inline]] decltype(auto) gather(T &&x, communicator c = {}, int root = 0, bool all = false) { // NOLINT (forwarding is not needed) + if constexpr (requires { mpi_gather(x, c, root, all); }) { + return mpi_gather(x, c, root, all); } else { - if (has_env) - return mpi_gather(std::forward(x), c, root, all); - else - return detail::convert(std::forward(x)); + std::remove_cvref_t res; + gather_into(x, res, c, root, all); + return res; } } + /** + * @brief Generic MPI gather that gathers directly into an existing output object. + * + * @details It calls the specialized `mpi_gather_into` function. + * + * @note We do not check if an MPI runtime environment is being used, i.e. if mpi::has_env is true. It is the + * responsibility of the specializations to do this check, in case they make direct calls to the MPI C library. + * + * @tparam T1 Type to be gathered. + * @tparam T2 Type to be gathered into. + * @param x_in Object to be gathered. + * @param x_out Object to be gathered into. + * @param c mpi::communicator. + * @param root Rank of the root process. + * @param all Should all processes receive the result of the gather. + */ + template + [[gnu::always_inline]] void gather_into(T1 &&x_in, T2 &&x_out, communicator c = {}, int root = 0, // NOLINT (forwarding is not needed) + bool all = false) { + mpi_gather_into(x_in, x_out, c, root, all); + } + /** * @brief Generic MPI all-reduce. * @details It simply calls mpi::reduce with `all = true`. */ - template [[gnu::always_inline]] inline decltype(auto) all_reduce(T &&x, communicator c = {}, MPI_Op op = MPI_SUM) { - return reduce(std::forward(x), c, 0, true, op); + template + [[gnu::always_inline]] decltype(auto) all_reduce(T &&x, communicator c = {}, MPI_Op op = MPI_SUM) { // NOLINT (forwarding is not needed) + return reduce(x, c, 0, true, op); } /** - * @brief Generic MPI all-reduce in-place. + * @brief Generic MPI all-reduce in place. * @details It simply calls mpi::reduce_in_place with `all = true`. */ - template [[gnu::always_inline]] inline void all_reduce_in_place(T &&x, communicator c = {}, MPI_Op op = MPI_SUM) { - reduce_in_place(std::forward(x), c, 0, true, op); + template + [[gnu::always_inline]] void all_reduce_in_place(T &&x, communicator c = {}, MPI_Op op = MPI_SUM) { // NOLINT (forwarding is not needed) + reduce_in_place(x, c, 0, true, op); + } + + /** + * @brief Generic MPI all-reduce that reduces directly into an existing output object. + * @details It simply calls mpi::reduce_into with `all = true`. + */ + template + [[gnu::always_inline]] void all_reduce_into(T1 &&x_in, T2 &&x_out, communicator c = {}, MPI_Op op = MPI_SUM) { // NOLINT (forwarding is not needed) + return reduce_into(x_in, x_out, c, 0, true, op); } /** * @brief Generic MPI all-gather. * @details It simply calls mpi::gather with `all = true`. */ - template [[gnu::always_inline]] inline decltype(auto) all_gather(T &&x, communicator c = {}) { - return gather(std::forward(x), c, 0, true); + template [[gnu::always_inline]] decltype(auto) all_gather(T &&x, communicator c = {}) { // NOLINT (forwarding is not needed) + return gather(x, c, 0, true); + } + + /** + * @brief Generic MPI all-gather that gathers directly into an existing output object. + * @details It simply calls mpi::gather_into with `all = true`. + */ + template + [[gnu::always_inline]] void all_gather_into(T1 &&x_in, T2 &&x_out, communicator c = {}) { // NOLINT (forwarding is not needed) + return gather_into(x_in, x_out, c, 0, true); + } + + /** + * @brief Checks if a given object is equal across all ranks in the given communicator. + * + * @details It makes two calls to mpi::all_reduce, one with `MPI_MIN` and the other with `MPI_MAX`, and compares their + * results. + * + * @note `MPI_MIN` and `MPI_MAX` need to make sense for the given type `T`. + * + * @tparam T Type to be checked. + * @param x Object to be equality compared. + * @param c mpi::communicator. + * @return If the given object is equal on all ranks, it returns true. Otherwise, it returns false. + */ + template bool all_equal(T const &x, communicator c = {}) { + if (!has_env || c.size() < 2) return true; + auto min_obj = all_reduce(x, c, MPI_MIN); + auto max_obj = all_reduce(x, c, MPI_MAX); + return min_obj == max_obj; } /** - * @brief Implementation of an MPI broadcast for types that have a corresponding MPI datatype, i.e. for which a - * specialization of mpi::mpi_type has been defined. + * @brief Implementation of an MPI broadcast for types that have a corresponding MPI datatype. + * + * @details If mpi::has_env is false or if the communicator size is < 2, it does nothing. Otherwise, it calls + * `MPI_Bcast`. * - * @details It throws an exception in case a call to the MPI C library fails. + * It throws an exception in case the call to the MPI C library fails. * * @tparam T Type to be broadcasted. - * @param x Object to be broadcasted. + * @param x Object to be broadcasted (into). * @param c mpi::communicator. * @param root Rank of the root process. */ template requires(has_mpi_type) void mpi_broadcast(T &x, communicator c = {}, int root = 0) { + // in case there is no active MPI environment or if the communicator size is < 2, do nothing + if (!has_env || c.size() < 2) return; + + // make the MPI C library call check_mpi_call(MPI_Bcast(&x, 1, mpi_type::get(), root, c.get()), "MPI_Bcast"); } /** - * @brief Implementation of an MPI reduce for types that have a corresponding MPI datatype, i.e. for which a - * specialization of mpi::mpi_type has been defined. + * @brief Implementation of an MPI reduce for types that have a corresponding MPI datatype. + * + * @details If mpi::has_env is false or if the communicator size is < 2, it returns a copy of the input object. + * Otherwise, it calls `MPI_Allreduce` or `MPI_Reduce` with a default constructed output object. * - * @details It throws an exception in case a call to the MPI C library fails. + * It throws an exception in case the call to the MPI C library fails. * * @tparam T Type to be reduced. * @param x Object to be reduced. @@ -232,29 +338,39 @@ namespace mpi { * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. - * @return The result of the reduction. + * @return Result of the reduction. */ template requires(has_mpi_type) T mpi_reduce(T const &x, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - T b; - auto d = mpi_type::get(); - if (!all) - // old MPI implementations may require a non-const send buffer - check_mpi_call(MPI_Reduce(const_cast(&x), &b, 1, d, op, root, c.get()), "MPI_Reduce"); // NOLINT - else - check_mpi_call(MPI_Allreduce(const_cast(&x), &b, 1, d, op, c.get()), "MPI_Allreduce"); // NOLINT - return b; + // in case there is no active MPI environment or if the communicator size is < 2, return the input object + if (!has_env || c.size() < 2) return x; + + // make the MPI C library call with a default constructed output object + T res; + if (all) { + check_mpi_call(MPI_Allreduce(&x, &res, 1, mpi_type::get(), op, c.get()), "MPI_Allreduce"); + } else { + check_mpi_call(MPI_Reduce(&x, &res, 1, mpi_type::get(), op, root, c.get()), "MPI_Reduce"); + } + return res; } /** - * @brief Implementation of an in-place MPI reduce for types that have a corresponding MPI datatype, i.e. for which - * a specialization of mpi::mpi_type has been defined. + * @brief Implementation of an MPI reduce that reduces directly into an existing output object for types that have a + * corresponding MPI datatype. + * + * @details If the addresses of the input and output objects are equal, the reduction is done in place. * - * @details It throws an exception in case a call to the MPI C library fails. + * If mpi::has_env is false or if the communicator size is < 2, it either does nothing (in place) or copies the input + * into the output object. Otherwise, it calls `MPI_Allreduce` or `MPI_Reduce` (with `MPI_IN_PLACE`). + * + * It throws an exception in case the call to the MPI C library fails and it is expected that either all or none of + * the receiving processes choose the in place option. * * @tparam T Type to be reduced. - * @param x Object to be reduced. + * @param x_in Object to be reduced. + * @param x_out Object to be reduced into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. @@ -262,33 +378,91 @@ namespace mpi { */ template requires(has_mpi_type) - void mpi_reduce_in_place(T &x, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - if (!all) - check_mpi_call(MPI_Reduce((c.rank() == root ? MPI_IN_PLACE : &x), &x, 1, mpi_type::get(), op, root, c.get()), "MPI_Reduce"); - else - check_mpi_call(MPI_Allreduce(MPI_IN_PLACE, &x, 1, mpi_type::get(), op, c.get()), "MPI_Allreduce"); + void mpi_reduce_into(T const &x_in, T &x_out, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { + // check if the reduction is in place + auto in_ptr = static_cast(&x_in); + auto out_ptr = static_cast(&x_out); + bool const in_place = (in_ptr == out_ptr); + if (all) { + EXPECTS_WITH_MESSAGE(all_equal(static_cast(in_place), c), + "Either zero or all receiving processes have to choose the in place option in mpi_reduce_into"); + } + + // in case there is no active MPI environment or if the communicator size is < 2, do nothing (in place) or copy + if (!has_env || c.size() < 2) { + if (!in_place) x_out = x_in; + return; + } + + // make the MPI C library call + if (in_place && (c.rank() == root || all)) in_ptr = MPI_IN_PLACE; + if (all) { + check_mpi_call(MPI_Allreduce(in_ptr, out_ptr, 1, mpi_type::get(), op, c.get()), "MPI_Allreduce"); + } else { + check_mpi_call(MPI_Reduce(in_ptr, out_ptr, 1, mpi_type::get(), op, root, c.get()), "MPI_Reduce"); + } } /** - * @brief Checks if a given object is equal across all ranks in the given communicator. + * @brief Implementation of an MPI gather for types that have a corresponding MPI datatype. * - * @details It requires that there is a specialized `mpi_reduce` for the given type `T` and that it is equality - * comparable as well as default constructible. + * @details It constructs an output vector, resizes it on receiving ranks to the size of the communicator and calls + * mpi::mpi_gather_into. On non-receiving ranks the output vector is empty. * - * It makes two calls to mpi::all_reduce, one with `MPI_MIN` and the other with `MPI_MAX`, and compares their results. + * @tparam T Type to be gathered. + * @param x Object to be gathered. + * @param c mpi::communicator. + * @param root Rank of the root process. + * @param all Should all processes receive the result of the gather. + * @return `std::vector` containing the gathered objects. + */ + template + requires(has_mpi_type) + std::vector mpi_gather(T const &x, communicator c = {}, int root = 0, bool all = false) { + std::vector res(c.rank() == root || all ? c.size() : 0); + mpi_gather_into(x, res, c, root, all); + return res; + } + + /** + * @brief Implementation of an MPI gather that gathers directly into an existing output range for types that have a + * corresponding MPI datatype. * - * @note `MPI_MIN` and `MPI_MAX` need to make sense for the given type `T`. + * @details If mpi::has_env is false or if the communicator size is < 2, it copies the input object into the range. + * Otherwise, it calls `MPI_Allgather` or `MPI_Gather. * - * @tparam T Type to be checked. - * @param x Object to be equality compared. + * It throws an exception in case a call to the MPI C library fails and it expects that the range size on receiving + * processes is equal the communicator size. + * + * @tparam T Type to be gathered. + * @tparam R MPICompatibleRange type to be gathered into. + * @param x Object to be gathered. + * @param rg Range to be gathered into. * @param c mpi::communicator. - * @return If the given object is equal on all ranks, it returns true. Otherwise, it returns false. + * @param root Rank of the root process. + * @param all Should all processes receive the result of the gather. */ - template bool all_equal(T const &x, communicator c = {}) { - if (!has_env) return true; - auto min_obj = all_reduce(x, c, MPI_MIN); - auto max_obj = all_reduce(x, c, MPI_MAX); - return min_obj == max_obj; + template + requires(has_mpi_type && std::same_as>>) + void mpi_gather_into(T const &x, R &&rg, communicator c = {}, int root = 0, bool all = false) { // NOLINT (ranges need not be forwarded) + // check the size of the output range + if (c.rank() == root || all) { + EXPECTS_WITH_MESSAGE(c.size() == std::ranges::size(rg), "Output range size is not equal the number of ranks in mpi_gather_into"); + } + + // in case there is no active MPI environment or if the communicator size is < 2, copy the input into the range + if (!has_env || c.size() < 2) { + std::ranges::copy(std::views::single(x), std::ranges::begin(rg)); + return; + } + + // make the MPI C library call + using value_t = std::ranges::range_value_t; + if (all) { + check_mpi_call(MPI_Allgather(&x, 1, mpi_type::get(), std::ranges::data(rg), 1, mpi_type::get(), c.get()), "MPI_Allgather"); + } else { + check_mpi_call(MPI_Gather(&x, 1, mpi_type::get(), std::ranges::data(rg), 1, mpi_type::get(), root, c.get()), "MPI_Gather"); + } } /** @} */ diff --git a/c++/mpi/lazy.hpp b/c++/mpi/lazy.hpp deleted file mode 100644 index e142ca56..00000000 --- a/c++/mpi/lazy.hpp +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2024 Simons Foundation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Alexander Hampel, Olivier Parcollet, Nils Wentzell - -/** - * @file - * @brief Provides a struct and tags to represent lazy MPI communication. - */ - -#pragma once - -#include "./communicator.hpp" - -#include - -namespace mpi { - - namespace tag { - - /** - * @ingroup mpi_lazy - * @brief Tag to specify a lazy MPI reduce call. - */ - struct reduce {}; - - /** - * @ingroup mpi_lazy - * @brief Tag to specify a lazy MPI scatter call. - */ - struct scatter {}; - - /** - * @ingroup mpi_lazy - * @brief Tag to specify a lazy MPI gather call. - */ - struct gather {}; - - } // namespace tag - - /** - * @addtogroup mpi_lazy - * @{ - */ - - /** - * @brief Represents a lazy MPI communication. - * - * @tparam Tag An mpi::tag to specify the kind of MPI communication. - * @tparam T Type to be communicated. - */ - template struct lazy { - /// Object to be communicated. - T rhs; - - /// mpi::communicator used in the lazy communication. - communicator c; - - /// Rank of the root process. - int root{}; - - /// Whether to use the `MPI_Allxxx` operation - bool all{}; - - /// `MPI_Op` used in the lazy communication (only relevant if mpi::tag::reduce is used). - MPI_Op op{}; - }; - - /** - * @brief Type trait to check if a type is mpi::lazy. - * @tparam T Type to be checked. - */ - template inline constexpr bool is_mpi_lazy = false; - - /** - * @brief Spezialization of mpi::is_mpi_lazy. - * - * @tparam Tag Type to specify the kind of MPI call. - * @tparam T Type to be checked. - */ - template inline constexpr bool is_mpi_lazy> = true; - - /** @} */ - -} // namespace mpi diff --git a/c++/mpi/macros.hpp b/c++/mpi/macros.hpp index 1dec58d4..cd566580 100644 --- a/c++/mpi/macros.hpp +++ b/c++/mpi/macros.hpp @@ -87,12 +87,12 @@ #ifdef NDEBUG -#define EXPECTS(X) -#define ASSERT(X) -#define ENSURES(X) -#define EXPECTS_WITH_MESSAGE(X, ...) -#define ASSERT_WITH_MESSAGE(X, ...) -#define ENSURES_WITH_MESSAGE(X, ...) +#define EXPECTS(X) {} +#define ASSERT(X) {} +#define ENSURES(X) {} +#define EXPECTS_WITH_MESSAGE(X, ...) {} +#define ASSERT_WITH_MESSAGE(X, ...) {} +#define ENSURES_WITH_MESSAGE(X, ...) {} #else diff --git a/c++/mpi/mpi.hpp b/c++/mpi/mpi.hpp index 592214c3..46ac1be6 100644 --- a/c++/mpi/mpi.hpp +++ b/c++/mpi/mpi.hpp @@ -27,7 +27,6 @@ #include "./datatypes.hpp" #include "./environment.hpp" #include "./generic_communication.hpp" -#include "./lazy.hpp" #include "./monitor.hpp" #include "./operators.hpp" #include "./pair.hpp" diff --git a/c++/mpi/pair.hpp b/c++/mpi/pair.hpp index e8900ea8..115d6ed1 100644 --- a/c++/mpi/pair.hpp +++ b/c++/mpi/pair.hpp @@ -16,7 +16,7 @@ /** * @file - * @brief Provides an MPI broadcast and reduce for std::pair. + * @brief Provides an MPI broadcast and reduce for `std::pair`. */ #pragma once @@ -35,13 +35,13 @@ namespace mpi { */ /** - * @brief Implementation of an MPI broadcast for a std::pair. + * @brief Implementation of an MPI broadcast for a `std::pair`. * - * @details Simply calls the generic mpi::broadcast for the first and second element of the pair. + * @details It calls the generic mpi::broadcast for the first and second element of the pair. * * @tparam T1 Type of the first element of the pair. * @tparam T2 Type of the second element of the pair. - * @param p std::pair to broadcast. + * @param p `std::pair` to broadcast. * @param c mpi::communicator. * @param root Rank of the root process. */ @@ -51,18 +51,18 @@ namespace mpi { } /** - * @brief Implementation of an MPI reduce for a std::pair. + * @brief Implementation of an MPI reduce for a `std::pair`. * - * @details Simply calls the generic mpi::reduce for the first and second element of the pair. + * @details It calls the generic mpi::reduce for the first and second element of the pair separately. * * @tparam T1 Type of the first element of the pair. * @tparam T2 Type of the second element of the pair. - * @param p std::pair to be reduced. + * @param p `std::pair` to be reduced. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. - * @return std::pair containing the result of each individual reduction. + * @return `std::pair` containing the results of the two reductions. */ template auto mpi_reduce(std::pair const &p, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { diff --git a/c++/mpi/ranges.hpp b/c++/mpi/ranges.hpp index 5ca79124..63ff0d11 100644 --- a/c++/mpi/ranges.hpp +++ b/c++/mpi/ranges.hpp @@ -16,7 +16,7 @@ /** * @file - * @brief Provides an MPI broadcast, reduce, scatter and gather for contiguous ranges. + * @brief Provides an MPI broadcast, reduce, scatter and gather for generic ranges. */ #pragma once @@ -33,8 +33,13 @@ #include #include +#include +#include +#include #include #include +#include +#include #include namespace mpi { @@ -45,425 +50,262 @@ namespace mpi { */ /** - * @brief Implementation of an MPI broadcast for an mpi::contiguous_sized_range object. + * @brief Implementation of an MPI broadcast for `std::ranges::sized_range` objects. * - * @details If mpi::has_mpi_type is true for the value type of the range, then the range is broadcasted using a simple - * `MPI_Bcast`. Otherwise, the generic mpi::broadcast is called for each element of the range. + * @details The behaviour of this function is as follows: + * - If the number of elements to be broadcasted is zero, it does nothing. + * - If the range is contiguous with an MPI compatible value type, it calls `MPI_Bcast` and broadcasts the elements + * from the input range on the root process to all other processes. + * - Otherwise, it calls mpi::broadcast for each element separately. * - * It throws an exception in case a call to the MPI C library fails and it expects that the sizes of the ranges are - * equal across all processes. + * It throws an exception in case a call to the MPI C library fails and it expects that the input range size is equal + * on all processes. * - * If the ranges are empty or if mpi::has_env is false or if the communicator size is < 2, it does nothing. - * - * @note It is recommended to use the generic mpi::broadcast for supported types, e.g. `std::vector`, `std::array` or - * `std::string`. It is the user's responsibility to ensure that ranges have the correct sizes. - * - * @code{.cpp} - * // create a vector on all ranks - * auto vec = std::vector(5); - * - * if (comm.rank() == 0) { - * // on rank 0, initialize the vector and broadcast the first 3 elements - * vec = {1, 2, 3, 0, 0}; - * mpi::broadcast_range(std::span{vec.data(), 3}, comm); - * } else { - * // on other ranks, broadcast to the last 3 elements of the vector - * mpi::broadcast_range(std::span{vec.data() + 2, 3}, comm); - * } - * - * // output result - * for (auto x : vec) std::cout << x << " "; - * std::cout << std::endl; - * @endcode - * - * Output (with 4 processes): - * - * ``` - * 1 2 3 0 0 - * 0 0 1 2 3 - * 0 0 1 2 3 - * 0 0 1 2 3 - * ``` - * - * @tparam R mpi::contiguous_sized_range type. - * @param rg Range to broadcast. + * @tparam R `std::ranges::sized_range` type. + * @param rg Range to be broadcasted (into). * @param c mpi::communicator. * @param root Rank of the root process. */ - template void broadcast_range(R &&rg, communicator c = {}, int root = 0) { // NOLINT (ranges need not be forwarded) - // check the sizes of all ranges - using value_t = std::ranges::range_value_t; - auto const size = std::ranges::size(rg); - EXPECTS_WITH_MESSAGE(all_equal(size, c), "Range sizes are not equal across all processes in mpi::broadcast_range"); - - // do nothing if the range is empty, if MPI is not initialized or if the communicator size is < 2 - if (size == 0 || !has_env || c.size() < 2) return; - - // broadcast the range - if constexpr (has_mpi_type) - // make an MPI C library call for MPI compatible value types - check_mpi_call(MPI_Bcast(std::ranges::data(rg), size, mpi_type::get(), root, c.get()), "MPI_Bcast"); - else - // otherwise call the specialized mpi_broadcast for each element - for (auto &val : rg) broadcast(val, c, root); - } + template void broadcast_range(R &&rg, communicator c = {}, int root = 0) { // NOLINT (ranges need not be forwarded) + // check the size of the range + auto size = static_cast(std::ranges::size(rg)); + EXPECTS_WITH_MESSAGE(all_equal(size, c), "Range sizes are not equal on all processes in mpi::broadcast_range"); - /** - * @brief Implementation of an in-place MPI reduce for an mpi::contiguous_sized_range object. - * - * @details If mpi::has_mpi_type is true for the value type of the range, then the range is reduced using a simple - * `MPI_Reduce` or `MPI_Allreduce` with `MPI_IN_PLACE`. Otherwise, the specialized `mpi_reduce_in_place` is called - * for each element in the range. - * - * It throws an exception in case a call to the MPI C library fails and it expects that the sizes of the ranges are - * equal across all processes. - * - * If the ranges are empty or if mpi::has_env is false or if the communicator size is < 2, it does nothing. - * - * @note It is recommended to use the generic mpi::reduce_in_place and mpi::all_reduce_in_place for supported types, - * e.g. `std::vector` or `std::array`. It is the user's responsibility to ensure that ranges have the correct sizes. - * - * @code{.cpp} - * // create a vector on all ranks - * auto vec = std::vector{0, 1, 2, 3, 4}; - * - * // in-place reduce the middle elements only on rank 0 - * mpi::reduce_in_place_range(std::span{vec.data() + 1, 3}, comm); - * - * // output result - * for (auto x : vec) std::cout << x << " "; - * std::cout << std::endl; - * @endcode - * - * Output (with 4 processes): - * - * ``` - * 0 1 2 3 4 - * 0 1 2 3 4 - * 0 1 2 3 4 - * 0 4 8 12 4 - * ``` - * - * @tparam R mpi::contiguous_sized_range type. - * @param rg Range to reduce. - * @param c mpi::communicator. - * @param root Rank of the root process. - * @param all Should all processes receive the result of the reduction. - * @param op `MPI_Op` used in the reduction. - */ - template - void reduce_in_place_range(R &&rg, communicator c = {}, int root = 0, bool all = false, // NOLINT (ranges need not be forwarded) - MPI_Op op = MPI_SUM) { - // check the sizes of all ranges - using value_t = std::ranges::range_value_t; - auto const size = std::ranges::size(rg); - EXPECTS_WITH_MESSAGE(all_equal(size, c), "Range sizes are not equal across all processes in mpi::reduce_in_place_range"); - - // do nothing if the range is empty, if MPI is not initialized or if the communicator size is < 2 - if (size == 0 || !has_env || c.size() < 2) return; - - // reduce the ranges - if constexpr (has_mpi_type) { - // make an MPI C library call for MPI compatible value types - auto data = std::ranges::data(rg); - if (!all) - check_mpi_call(MPI_Reduce((c.rank() == root ? MPI_IN_PLACE : data), data, size, mpi_type::get(), op, root, c.get()), "MPI_Reduce"); - else - check_mpi_call(MPI_Allreduce(MPI_IN_PLACE, data, size, mpi_type::get(), op, c.get()), "MPI_Allreduce"); + // do nothing if no elements are broadcasted + if (size <= 0) return; + + // call the MPI C library if the ranges are contiguous with MPI compatible value types, otherwise do element-wise + // broadcasts + if constexpr (MPICompatibleRange) { + // in case there is no active MPI environment or if the communicator size is < 2, do nothing + if (!has_env || c.size() < 2) return; + + // make the MPI C library call (allow the number of elements to larger than INT_MAX) + constexpr long max_int = std::numeric_limits::max(); + for (long offset = 0; size > 0; offset += max_int, size -= max_int) { + auto const count = static_cast(std::min(size, max_int)); + check_mpi_call(MPI_Bcast(std::ranges::data(rg) + offset, count, mpi_type>::get(), root, c.get()), "MPI_Bcast"); + } } else { - // otherwise call the specialized mpi_reduce_in_place for each element - for (auto &val : rg) mpi_reduce_in_place(val, c, root, all, op); + // otherwise call the generic broadcast for each element separately + for (auto &x : rg) broadcast(x, c, root); } } /** - * @brief Implementation of an MPI reduce for an mpi::contiguous_sized_range. - * - * @details If mpi::has_mpi_type is true for the value type of the range, then the range is reduced using a simple - * `MPI_Reduce` or `MPI_Allreduce`. Otherwise, the specialized `mpi_reduce` is called for each element in the range. - * - * It throws an exception in case a call to the MPI C library fails and it expects that the sizes of the input ranges - * are equal across all processes and that they are equal to the size of the output range on receiving processes. - * - * If the input ranges are empty, it does nothing. If mpi::has_env is false or if the communicator size is < 2, it - * simply copies the input range to the output range. - * - * @note It is recommended to use the generic mpi::reduce and mpi::all_reduce for supported types, e.g. `std::vector` - * or `std::array`. It is the user's responsibility to ensure that ranges have the correct sizes. - * - * @code{.cpp} - * // create input and output vectors on all ranks - * auto in_vec = std::vector{0, 1, 2, 3, 4}; - * auto out_vec = std::vector(in_vec.size(), 0); - * - * // allreduce the middle elements of the input vector to the last elements of the output vector - * mpi::reduce_range(std::span{in_vec.data() + 1, 3}, std::span{out_vec.data() + 2, 3}, comm, 0, true); - * - * // output result - * for (auto x : out_vec) std::cout << x << " "; - * std::cout << std::endl; - * @endcode - * - * Output (with 4 processes): - * - * ``` - * 0 0 4 8 12 - * 0 0 4 8 12 - * 0 0 4 8 12 - * 0 0 4 8 12 - * ``` - * - * @tparam R1 mpi::contiguous_sized_range type. - * @tparam R2 mpi::contiguous_sized_range type. - * @param in_rg Range to reduce. - * @param out_rg Range to reduce into. + * @brief Implementation of an MPI reduce for `std::ranges::sized_range` objects. + * + * @details The behaviour of this function is as follows: + * - If the number of elements to be reduced is zero, it does nothing. + * - If the range is contiguous with an MPI compatible value type, it calls `MPI_Reduce` or `MPI_Allreduce` to reduce + * the elements in the input ranges into the output ranges on receiving ranks. + * - If the input and output ranges point to the same data, the reduction is done in place. + * - Otherwise, it calls mpi::reduce_into for each input-output element pair separately. + * + * It throws an exception in case a call to the MPI C library fails and it expects + * - that the input range size on all processes and the output range size on receiving processes are equal and + * - that either all or none of the receiving processes choose the in place option. + * + * @tparam R1 `std::ranges::sized_range` type. + * @tparam R2 `std::ranges::sized_range` type. + * @param in_rg Range to be reduced. + * @param out_rg Range to be reduced into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. */ - template + template void reduce_range(R1 &&in_rg, R2 &&out_rg, communicator c = {}, int root = 0, bool all = false, // NOLINT (ranges need not be forwarded) MPI_Op op = MPI_SUM) { - // check input and output ranges - auto const in_size = std::ranges::size(in_rg); - EXPECTS_WITH_MESSAGE(all_equal(in_size, c), "Input range sizes are not equal across all processes in mpi::reduce_range"); - if (c.rank() == root || all) { - EXPECTS_WITH_MESSAGE(in_size == std::ranges::size(out_rg), "Input and output range sizes are not equal in mpi::reduce_range"); - } + // check the size of the input range + auto size = static_cast(std::ranges::size(in_rg)); + EXPECTS_WITH_MESSAGE(all_equal(size, c), "Input range sizes are not equal on all processes in mpi::reduce_range"); - // do nothing if the input range is empty - if (in_size == 0) return; + // do nothing if no elements are reduced + if (size <= 0) return; - // simply copy if there is no active MPI environment or if the communicator size is < 2 - if (!has_env || c.size() < 2) { - std::ranges::copy(std::forward(in_rg), std::ranges::data(out_rg)); - return; - } + // check the size of the output range + bool const receives = (c.rank() == root || all); + if (receives) EXPECTS_WITH_MESSAGE(size == std::ranges::size(out_rg), "Input and output range sizes are not equal in mpi::reduce_range"); + + // call the MPI C library if the ranges are contiguous with MPI compatible value types + if constexpr (MPICompatibleRange && MPICompatibleRange) { + static_assert(std::same_as>, std::remove_cvref_t>>, + "Value types of input and output ranges not compatible in mpi::reduce_range"); - // reduce the ranges - using in_value_t = std::ranges::range_value_t; - using out_value_t = std::ranges::range_value_t; - if constexpr (has_mpi_type && std::same_as) { - // make an MPI C library call for MPI compatible value types - auto const in_data = std::ranges::data(in_rg); - auto out_data = std::ranges::data(out_rg); - if (!all) - check_mpi_call(MPI_Reduce(in_data, out_data, in_size, mpi_type::get(), op, root, c.get()), "MPI_Reduce"); - else - check_mpi_call(MPI_Allreduce(in_data, out_data, in_size, mpi_type::get(), op, c.get()), "MPI_Allreduce"); + // check if the reduction is in place + bool const in_place = (static_cast(std::ranges::data(in_rg)) == static_cast(std::ranges::data(out_rg))); + if (all) { + EXPECTS_WITH_MESSAGE(all_equal(static_cast(in_place), c), + "Either zero or all receiving processes have to choose the in place option in mpi::reduce_range"); + } + + // in case there is no active MPI environment or if the communicator size is < 2, copy to the output range + if (!has_env || c.size() < 2) { + std::ranges::copy(std::forward(in_rg), std::ranges::data(out_rg)); + return; + } + + // make the MPI C library call (allow the number of elements to larger than INT_MAX) + constexpr long max_int = std::numeric_limits::max(); + for (long offset = 0; size > 0; offset += max_int, size -= max_int) { + auto in_data = static_cast(std::ranges::data(in_rg) + offset); + auto out_data = std::ranges::data(out_rg) + offset; + if (receives and in_place) in_data = MPI_IN_PLACE; + auto const count = static_cast(std::min(size, max_int)); + if (all) { + check_mpi_call(MPI_Allreduce(in_data, out_data, count, mpi_type>::get(), op, c.get()), "MPI_Allreduce"); + } else { + check_mpi_call(MPI_Reduce(in_data, out_data, count, mpi_type>::get(), op, root, c.get()), "MPI_Reduce"); + } + } } else { - // otherwise call the specialized mpi_reduce for each element - // the size of the output range is arbitrary on non-recieving ranks, so we cannot use transform on them - if (c.rank() == root || all) - std::ranges::transform(std::forward(in_rg), std::ranges::data(out_rg), [&](auto const &val) { return reduce(val, c, root, all, op); }); - else - // the assignment is needed in case a lazy object is returned - std::ranges::for_each(std::forward(in_rg), [&](auto const &val) { [[maybe_unused]] out_value_t ignore = reduce(val, c, root, all, op); }); + // fallback to element-wise reduction if the range is not contiguous with an MPI compatible value type + if (size <= std::ranges::size(out_rg)) { + // on ranks where the output range size is large enough, reduce into the output elements + for (auto &&[x_in, x_out] : itertools::zip(in_rg, out_rg)) reduce_into(x_in, x_out, c, root, all, op); + } else { + // on all other ranks, reduce into a dummy output object (needs to be default constructible) + using out_value_t = std::ranges::range_value_t; + if constexpr (std::is_default_constructible_v) { + out_value_t out_dummy{}; + for (auto &&x_in : in_rg) reduce_into(x_in, out_dummy, c, root, all, op); + } else { + // if it is not default constructible, is there something we can do? + throw std::runtime_error("Cannot default construct dummy object in mpi::reduce_range"); + } + } } } /** - * @brief Implementation of an MPI scatter for an mpi::contiguous_sized_range. - * - * @details If mpi::has_mpi_type is true for the value type of the range, then the range is scattered as evenly as - * possible across the processes in the communicator using a simple `MPI_Scatterv`. Otherwise an exception is thrown. - * - * The user can specify a chunk size which is used to divide the input range into chunks of the specified size. The - * number of chunks are then distributed evenly across the processes in the communicator. The size of the input range - * is required to be a multiple of the given chunk size, otherwise an exception is thrown. - * - * It throws an exception in case a call to the MPI C library fails and it expects that the output ranges have the - * correct size and that they add up to the size of the input range on the root process. - * - * If the input range is empty on root, it does nothing. If mpi::has_env is false or if the communicator size is < 2, - * it simply copies the input range to the output range. - * - * @note It is recommended to use the generic mpi::scatter for supported types, e.g. `std::vector`. It is the user's - * responsibility to ensure that the ranges have the correct sizes (mpi::chunk_length can be useful to do that). - * - * @code{.cpp} - * // create input and output vectors on all ranks - * auto in_vec = std::vector{}; - * if (comm.rank() == 0) in_vec = {0, 1, 2, 3, 4, 5, 6, 7}; - * auto out_vec = std::vector(mpi::chunk_length(5, comm.size(), comm.rank()), 0); - * - * // scatter the middle elements of the input vector from rank 0 to all ranks - * mpi::scatter_range(std::span{in_vec.data() + 1, 5}, out_vec, 5, comm); - * - * // output result - * for (auto x : out_vec) std::cout << x << " "; - * std::cout << std::endl; - * @endcode - * - * Output (with 2 processes): - * - * ``` - * 4 5 - * 1 2 3 - * ``` - * - * @tparam R1 mpi::contiguous_sized_range type. - * @tparam R2 mpi::contiguous_sized_range type. - * @param in_rg Range to scatter. - * @param out_rg Range to scatter into. - * @param in_size Size of the input range on root (must also be given on non-root ranks). + * @brief Implementation of an MPI scatter for mpi::MPICompatibleRange objects. + * + * @details The behaviour of this function is as follows: + * - If the number of elements to be scattered is zero, it does nothing. + * - Otherwise, it calls `MPI_Scatterv` to scatter the input range from the root process to the output ranges on all + * other processes. + * + * By default, the input range is scattered as evenly as possible from the root process to all other processes in the + * communicator. To change that, the user can specify a chunk size which is used to divide the number of elements to + * be scattered into chunks of the specified size. Then, instead of single elements, the chunks are distributed evenly + * across the processes in the communicator. + * + * It throws an exception if call to the MPI C library fails and it expects + * - that the number of elements to be scattered is equal on all processes, + * - that the size of the input range on the root process is equal the number of elements to be scattered and + * - that the output range size is equal the number of elements to be received on all processes. + * + * @note In place scattering is not supported. + * + * @tparam R1 mpi::MPICompatibleRange type. + * @tparam R2 mpi::MPICompatibleRange type. + * @param in_rg Range to be scattered. + * @param out_rg Range to be scattered into. + * @param scatter_size Number of elements to be scattered. * @param c mpi::communicator. * @param root Rank of the root process. * @param chunk_size Size of the chunks to scatter. */ - template - requires(std::same_as, std::ranges::range_value_t>) - void scatter_range(R1 &&in_rg, R2 &&out_rg, long in_size, communicator c = {}, int root = 0, // NOLINT (ranges need not be forwarded) + template + requires(std::same_as>, std::remove_cvref_t>>) + void scatter_range(R1 &&in_rg, R2 &&out_rg, long scatter_size, communicator c = {}, int root = 0, // NOLINT (ranges need not be forwarded) long chunk_size = 1) { - // check the sizes of the input and output ranges + // check the number of elements to be scattered + EXPECTS_WITH_MESSAGE(all_equal(scatter_size, c), "Number of elements to be scattered is not equal on all processes in mpi::scatter_range"); + + // do nothing if no elements are scattered + if (scatter_size == 0) return; + + // check the size of the input range on root if (c.rank() == root) { - EXPECTS_WITH_MESSAGE(in_size == std::ranges::size(in_rg), "Input range size not equal to provided size in mpi::scatter_range"); + EXPECTS_WITH_MESSAGE(scatter_size == std::ranges::size(in_rg), + "Input range size on root is not equal the number of elements to be scattered in mpi::scatter_range"); } - EXPECTS_WITH_MESSAGE(in_size == all_reduce(std::ranges::size(out_rg), c), - "Output range sizes don't add up to input range size in mpi::scatter_range"); - // do nothing if the input range is empty - if (in_size == 0) return; + // check the size of the output range + auto const recvcount = static_cast(chunk_length(scatter_size, c.size(), c.rank(), chunk_size)); + EXPECTS_WITH_MESSAGE(recvcount == std::ranges::size(out_rg), + "Output range size is not equal the number of elements to be received in mpi::scatter_range"); - // simply copy if there is no active MPI environment or if the communicator size is < 2 + // in case there is no active MPI environment or if the communicator size is < 2, copy to output range if (!has_env || c.size() < 2) { std::ranges::copy(std::forward(in_rg), std::ranges::data(out_rg)); return; } - // check the size of the output range - int recvcount = static_cast(chunk_length(in_size, c.size(), c.rank(), chunk_size)); - EXPECTS_WITH_MESSAGE(recvcount == std::ranges::size(out_rg), "Output range size is incorrect in mpi::scatter_range"); - // prepare arguments for the MPI call auto sendcounts = std::vector(c.size()); auto displs = std::vector(c.size() + 1, 0); for (int i = 0; i < c.size(); ++i) { - sendcounts[i] = static_cast(chunk_length(in_size, c.size(), i, chunk_size)); + sendcounts[i] = static_cast(chunk_length(scatter_size, c.size(), i, chunk_size)); displs[i + 1] = sendcounts[i] + displs[i]; } - // scatter the range - using in_value_t = std::ranges::range_value_t; - using out_value_t = std::ranges::range_value_t; - if constexpr (has_mpi_type && has_mpi_type) { - // make an MPI C library call for MPI compatible value types - auto const in_data = std::ranges::data(in_rg); - auto out_data = std::ranges::data(out_rg); - check_mpi_call(MPI_Scatterv(in_data, sendcounts.data(), displs.data(), mpi_type::get(), out_data, recvcount, - mpi_type::get(), root, c.get()), - "MPI_Scatterv"); - } else { - // otherwise throw an exception - throw std::runtime_error{"Error in mpi::scatter_range: Types with no corresponding datatype can only be all-gathered"}; - } + // make the MPI C library call + check_mpi_call(MPI_Scatterv(std::ranges::data(in_rg), sendcounts.data(), displs.data(), mpi_type>::get(), + std::ranges::data(out_rg), recvcount, mpi_type>::get(), root, c.get()), + "MPI_Scatterv"); } /** - * @brief Implementation of an MPI gather for an mpi::contiguous_sized_range. - * - * @details If mpi::has_mpi_type is true for the value type of the input ranges, then the ranges are gathered using a - * simple `MPI_Gatherv` or `MPI_Allgatherv`. Otherwise, each process broadcasts its elements to all other processes - * which implies that `all == true` is required in this case. - * - * It throws an exception in case a call to the MPI C library fails and it expects that the sizes of the input ranges - * add up to the given size of the output range and that the output ranges have the correct size on receiving - * processes. - * - * If the input ranges are all empty, it does nothing. If mpi::has_env is false or if the communicator size is < 2, it - * simply copies the input range to the output range. - * - * @note It is recommended to use the generic mpi::gather for supported types, e.g. `std::vector` and `std::string`. - * It is the user's responsibility to ensure that the ranges have the correct sizes. - * - * @code{.cpp} - * // create input and output vectors on all ranks - * auto in_vec = std::vector{0, 1, 2, 3, 4}; - * auto out_vec = std::vector(3 * comm.size(), 0); + * @brief Implementation of an MPI gather for mpi::MPICompatibleRange objects. * - * // gather the middle elements of the input vectors from all ranks on rank 0 - * mpi::gather_range(std::span{in_vec.data() + 1, 3}, out_vec, 3 * comm.size(), comm); + * @details The behaviour of this function is as follows: + * - If the number of elements to be gathered is zero, it does nothing. + * - Otherwise, it calls `MPI_Gatherv` or `MPI_Allgatherv` to gather the elements from the input ranges on all + * processes into the output ranges on receiving processes. * - * // output result - * for (auto x : out_vec) std::cout << x << " "; - * std::cout << std::endl; - * @endcode + * This is the inverse operation of mpi::scatter_range. The numbers of elements to be gathered do not have to be equal + * on all processes. * - * Output (with 2 processes): + * It throws an exception in case a call to the MPI C library fails and it expects that the output range sizes on + * receiving processes is the number of elements to be gathered. * - * ``` - * 0 0 0 0 0 0 0 0 0 0 0 0 - * 0 0 0 0 0 0 0 0 0 0 0 0 - * 0 0 0 0 0 0 0 0 0 0 0 0 - * 1 2 3 1 2 3 1 2 3 1 2 3 - * ``` + * @note In place gathering is not supported. * - * @tparam R1 mpi::contiguous_sized_range type. - * @tparam R2 mpi::contiguous_sized_range type. - * @param in_rg Range to gather. - * @param out_rg Range to gather into. - * @param out_size Size of the output range on receiving processes (must also be given on non-receiving ranks). + * @tparam R1 mpi::MPICompatibleRange type. + * @tparam R2 mpi::MPICompatibleRange type. + * @param in_rg Range to be gathered. + * @param out_rg Range to be gathered into. * @param c mpi::communicator. * @param root Rank of the root process. - * @param all Should all processes receive the result of the reduction. + * @param all Should all processes receive the result of the gather operation. */ - template - void gather_range(R1 &&in_rg, R2 &&out_rg, long out_size, communicator c = {}, int root = 0, // NOLINT (ranges need not be forwarded) - bool all = false) { - // check the sizes of the input and output ranges - auto const in_size = std::ranges::size(in_rg); - EXPECTS_WITH_MESSAGE(out_size = all_reduce(in_size, c), "Input range sizes don't add up to output range size in mpi::gather_range"); + template + requires(std::same_as>, std::remove_cvref_t>>) + void gather_range(R1 &&in_rg, R2 &&out_rg, communicator c = {}, int root = 0, bool all = false) { // NOLINT (ranges need not be forwarded) + // get the receive counts (sendcount from each process) and the displacements + auto sendcount = static_cast(std::ranges::size(in_rg)); + auto recvcounts = all_gather(sendcount, c); + auto displs = std::vector(c.size() + 1, 0); + std::partial_sum(recvcounts.begin(), recvcounts.end(), displs.begin() + 1); + + // do nothing if there are no elements to gather + if (displs.back() == 0) return; + + // check the size of the output range on receiving ranks if (c.rank() == root || all) { - EXPECTS_WITH_MESSAGE(out_size == std::ranges::size(out_rg), "Output range size is incorrect in mpi::gather_range"); + EXPECTS_WITH_MESSAGE(displs.back() == std::ranges::size(out_rg), + "Output range size is not equal the number of elements to be received in mpi::gather_range"); } - // do nothing if the output range is empty - if (out_size == 0) return; - - // simply copy if there is no active MPI environment or if the communicator size is < 2 + // in case there is no active MPI environment or if the communicator size is < 2, copy to the output range if (!has_env || c.size() < 2) { std::ranges::copy(std::forward(in_rg), std::ranges::data(out_rg)); return; } - // prepare arguments for the MPI call - auto recvcounts = std::vector(c.size()); - auto displs = std::vector(c.size() + 1, 0); - int sendcount = in_size; - if (!all) - check_mpi_call(MPI_Gather(&sendcount, 1, mpi_type::get(), recvcounts.data(), 1, mpi_type::get(), root, c.get()), "MPI_Gather"); - else - check_mpi_call(MPI_Allgather(&sendcount, 1, mpi_type::get(), recvcounts.data(), 1, mpi_type::get(), c.get()), "MPI_Allgather"); - for (int i = 0; i < c.size(); ++i) displs[i + 1] = recvcounts[i] + displs[i]; - - // gather the ranges - using in_value_t = std::ranges::range_value_t; - using out_value_t = std::ranges::range_value_t; - if constexpr (has_mpi_type && has_mpi_type) { - // make an MPI C library call for MPI compatible value types - auto const in_data = std::ranges::data(in_rg); - auto out_data = std::ranges::data(out_rg); - if (!all) - check_mpi_call(MPI_Gatherv(in_data, sendcount, mpi_type::get(), out_data, recvcounts.data(), displs.data(), - mpi_type::get(), root, c.get()), - "MPI_Gatherv"); - else - check_mpi_call(MPI_Allgatherv(in_data, sendcount, mpi_type::get(), out_data, recvcounts.data(), displs.data(), - mpi_type::get(), c.get()), - "MPI_Allgatherv"); + // make the MPI C library call + if (all) { + check_mpi_call(MPI_Allgatherv(std::ranges::data(in_rg), sendcount, mpi_type>::get(), std::ranges::data(out_rg), + recvcounts.data(), displs.data(), mpi_type>::get(), c.get()), + "MPI_Allgatherv"); } else { - if (all) { - // if all == true, each process broadcasts it elements to all other ranks - for (int i = 0; i < c.size(); ++i) { - auto view = std::views::drop(out_rg, displs[i]) | std::views::take(displs[i + 1] - displs[i]); - if (c.rank() == i) std::ranges::copy(in_rg, std::ranges::begin(view)); - broadcast_range(view, c, i); - } - } else { - // otherwise throw an exception - throw std::runtime_error{"Error in mpi::gather_range: Types with no corresponding datatype can only be all-gathered"}; - } + check_mpi_call(MPI_Gatherv(std::ranges::data(in_rg), sendcount, mpi_type>::get(), std::ranges::data(out_rg), + recvcounts.data(), displs.data(), mpi_type>::get(), root, c.get()), + "MPI_Gatherv"); } } diff --git a/c++/mpi/string.hpp b/c++/mpi/string.hpp index f20c1474..d5ce188f 100644 --- a/c++/mpi/string.hpp +++ b/c++/mpi/string.hpp @@ -16,7 +16,7 @@ /** * @file - * @brief Provides an MPI broadcast for std::string. + * @brief Provides an MPI broadcast and gather for `std::string`. */ #pragma once @@ -35,39 +35,39 @@ namespace mpi { */ /** - * @brief Implementation of an MPI broadcast for a std::string. + * @brief Implementation of an MPI broadcast for a `std::string`. * * @details It first broadcasts the size of the string from the root process to all other processes, then resizes the * string on all non-root processes and calls mpi::broadcast_range with the (resized) input string. * - * @param s std::string to broadcast. + * @param s `std::string` to broadcast (into). * @param c mpi::communicator. * @param root Rank of the root process. */ inline void mpi_broadcast(std::string &s, communicator c, int root) { - size_t len = s.size(); - broadcast(len, c, root); - if (c.rank() != root) s.resize(len); + auto count = s.size(); + broadcast(count, c, root); + if (c.rank() != root) s.resize(count); broadcast_range(s, c, root); } /** - * @brief Implementation of an MPI gather for a std::string. + * @brief Implementation of an MPI gather for a `std::string` that gathers directly into an existing output string. * - * @details It first all-reduces the sizes of the input string from all processes and then calls mpi::gather_range. + * @details It first all-reduces the sizes of the input strings from all processes. On receiving ranks, the output + * string is resized to the reduced size in case it has not the correct size. On non-receiving ranks, the output + * string is always unmodified. Then mpi::gather_range with the input and (resized) output strings is called. * - * @param s std::string to gather. + * @param s_in `std::string` to gather. + * @param s_out `std::string` to gather into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result. - * @return std::string containing the result of the gather operation. */ - inline std::string mpi_gather(std::string const &s, communicator c = {}, int root = 0, bool all = false) { - long len = static_cast(all_reduce(s.size(), c)); - std::string res{}; - if (c.rank() == root || all) res.resize(len); - gather_range(s, res, len, c, root, all); - return res; + inline void mpi_gather_into(std::string const &s_in, std::string &s_out, communicator c = {}, int root = 0, bool all = false) { + auto const gather_size = mpi::all_reduce(s_in.size(), c); + if ((c.rank() == root || all) && s_out.size() != s_in.size()) s_out.resize(gather_size); + gather_range(s_in, s_out, c, root, all); } /** @} */ diff --git a/c++/mpi/utils.hpp b/c++/mpi/utils.hpp index 9be3cbbf..597514e5 100644 --- a/c++/mpi/utils.hpp +++ b/c++/mpi/utils.hpp @@ -25,7 +25,6 @@ #include #include -#include namespace mpi { @@ -34,27 +33,6 @@ namespace mpi { * @{ */ - namespace detail { - - // Helper struct to get the regular type of a type. - template struct _regular { - using type = T; - }; - - // Spezialization of _regular for types with a `regular_type` type alias. - template struct _regular> { - using type = typename T::regular_type; - }; - - } // namespace detail - - /** - * @ingroup utilities - * @brief Type trait to get the regular type of a type. - * @tparam T Type to check. - */ - template using regular_t = typename detail::_regular>::type; - /** * @brief Check the success of an MPI call. * @details It checks if the given error code returned by an MPI routine is equal to `MPI_SUCCESS`. If it isn't, it @@ -74,13 +52,6 @@ namespace mpi { if (errcode != MPI_SUCCESS) throw std::runtime_error("MPI error " + std::to_string(errcode) + " in MPI routine " + mpi_routine); } - /** - * @brief A concept that checks if a range type is contiguous and sized. - * @tparam R Range type. - */ - template - concept contiguous_sized_range = std::ranges::contiguous_range && std::ranges::sized_range; - /** @} */ } // namespace mpi diff --git a/c++/mpi/vector.hpp b/c++/mpi/vector.hpp index bfd58535..91de1a29 100644 --- a/c++/mpi/vector.hpp +++ b/c++/mpi/vector.hpp @@ -16,7 +16,7 @@ /** * @file - * @brief Provides an MPI broadcast, reduce, scatter and gather for std::vector. + * @brief Provides an MPI broadcast, reduce, scatter and gather for `std::vector`. */ #pragma once @@ -28,6 +28,8 @@ #include +#include +#include #include namespace mpi { @@ -38,96 +40,108 @@ namespace mpi { */ /** - * @brief Implementation of an MPI broadcast for a std::vector. + * @brief Implementation of an MPI broadcast for a `std::vector`. * * @details It first broadcasts the size of the vector from the root process to all other processes, then resizes the * vector on all non-root processes and calls mpi::broadcast_range with the (resized) input vector. * * @tparam T Value type of the vector. - * @param v std::vector to broadcast. + * @param v `std::vector` to broadcast. * @param c mpi::communicator. * @param root Rank of the root process. */ template void mpi_broadcast(std::vector &v, communicator c = {}, int root = 0) { - auto bsize = v.size(); - broadcast(bsize, c, root); - if (c.rank() != root) v.resize(bsize); + auto count = v.size(); + broadcast(count, c, root); + if (c.rank() != root) v.resize(count); broadcast_range(v, c, root); } /** - * @brief Implementation of an in-place MPI reduce for a std::vector. + * @brief Implementation of an MPI reduce for a `std::vector`. * - * @details It simply calls mpi::reduce_in_place_range with the given input vector. + * @details It first constructs the output vector with its value type equal to the return type of + * `reduce(std::declval())`. On receiving ranks, the output vector is then resized to the size of the input vector. + * On non-receiving ranks, the output vector is always empty. + * + * It calls mpi::reduce_range with the input and constructed output vector. * * @tparam T Value type of the vector. - * @param v std::vector to reduce. + * @param v `std::vector` to reduce. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. + * @return `std::vector` containing the result of the reduction. */ - template void mpi_reduce_in_place(std::vector &v, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - reduce_in_place_range(v, c, root, all, op); + template auto mpi_reduce(std::vector const &v, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { + using value_type = std::remove_cvref_t()))>; + std::vector res(c.rank() == root || all ? v.size() : 0); + reduce_range(v, res, c, root, all, op); + return res; } /** - * @brief Implementation of an MPI reduce for a std::vector. + * @brief Implementation of an MPI reduce for a `std::vector` that reduces directly into a given output vector. * - * @details It simply calls mpi::reduce_range with the given input vector and an empty vector of the same size. + * @details It first resizes the output vector to the size of the input vector on receiving ranks and then calls + * mpi::reduce_range with the input and (resized) output vector. * - * @tparam T Value type of the vector. - * @param v std::vector to reduce. + * @tparam T1 Value type of the vector to be reduced. + * @tparam T2 Value type of the vector to be reduced into. + * @param v_in `std::vector` to reduce. + * @param v_out `std::vector` to reduce into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result of the reduction. * @param op `MPI_Op` used in the reduction. - * @return std::vector containing the result of each individual reduction. */ - template - auto mpi_reduce(std::vector const &v, communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - std::vector> res(c.rank() == root || all ? v.size() : 0); - reduce_range(v, res, c, root, all, op); - return res; + template + void mpi_reduce_into(std::vector const &v_in, std::vector &v_out, communicator c = {}, int root = 0, bool all = false, + MPI_Op op = MPI_SUM) { + if ((c.rank() == root || all) && v_out.size() != v_in.size()) v_out.resize(v_in.size()); + reduce_range(v_in, v_out, c, root, all, op); } /** - * @brief Implementation of an MPI scatter for a std::vector. + * @brief Implementation of an MPI scatter for a `std::vector` that scatters directly into an existing output vector. * - * @details It first broadcasts the size of the vector from the root process to all other processes and then calls - * mpi::scatter_range. + * @details It first broadcasts the size of the input vector from the root process to all other processes and + * resizes the output vector if it has not the correct size. The size of the output vector is determined with + * mpi::chunk_length. Then mpi::scatter_range is called with the input and (resized) output vector. * * @tparam T Value type of the vector. - * @param v std::vector to scatter. + * @param v_in `std::vector` to scatter. + * @param v_out `std::vector` to scatter into. * @param c mpi::communicator. * @param root Rank of the root process. - * @return std::vector containing the result of the scatter operation. */ - template auto mpi_scatter(std::vector const &v, communicator c = {}, int root = 0) { - auto bsize = v.size(); - broadcast(bsize, c, root); - std::vector res(chunk_length(bsize, c.size(), c.rank())); - scatter_range(v, res, bsize, c, root); - return res; + template void mpi_scatter_into(std::vector const &v_in, std::vector &v_out, communicator c = {}, int root = 0) { + auto scatter_size = static_cast(v_in.size()); + broadcast(scatter_size, c, root); + auto const recvcount = chunk_length(scatter_size, c.size(), c.rank()); + if (v_out.size() != recvcount) v_out.resize(recvcount); + scatter_range(v_in, v_out, scatter_size, c, root); } /** - * @brief Implementation of an MPI gather for a std::vector. + * @brief Implementation of an MPI gather for a `std::vector` that gathers directly into an existing output vector. * - * @details It first all-reduces the sizes of the input vectors from all processes and then calls mpi::gather_range. + * @details It first all-reduces the sizes of the input vectors from all processes. On receiving ranks, the output + * vector is resized to the reduced size in case it has not the correct size. On non-receiving ranks, the output + * vector is always unmodified. Then mpi::gather_range with the input and (resized) output vector is called. * * @tparam T Value type of the vector. - * @param v std::vector to gather. + * @param v_in `std::vector` to gather. + * @param v_out `std::vector` to gather into. * @param c mpi::communicator. * @param root Rank of the root process. * @param all Should all processes receive the result. - * @return std::vector containing the result of the gather operation. */ - template auto mpi_gather(std::vector const &v, communicator c = {}, int root = 0, bool all = false) { - long bsize = mpi::all_reduce(v.size(), c); - std::vector res(c.rank() == root || all ? bsize : 0); - gather_range(v, res, bsize, c, root, all); - return res; + template void mpi_gather_into(std::vector const &v_in, std::vector &v_out, communicator c = {}, int root = 0, bool all = false) { + auto const gather_size = mpi::all_reduce(v_in.size(), c); + if ((c.rank() == root || all) && v_out.size() != gather_size) v_out.resize(gather_size); + gather_range(v_in, v_out, c, root, all); } /** @} */ diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml index 48ea50bb..031fd722 100644 --- a/doc/DoxygenLayout.xml +++ b/doc/DoxygenLayout.xml @@ -22,6 +22,7 @@ + @@ -50,17 +51,11 @@ - - - - - - - + diff --git a/doc/documentation.md b/doc/documentation.md index a73e2696..515ab404 100644 --- a/doc/documentation.md +++ b/doc/documentation.md @@ -35,17 +35,7 @@ Furthermore, it offers tools to simplify the creation of custom MPI operations u ## Collective MPI communication -The following generic collective communications are defined in @ref coll_comm "Collective MPI communication": - -* @ref mpi::all_gather "all_gather" -* @ref mpi::all_reduce "all_reduce" -* @ref mpi::all_reduce_in_place "all_reduce_in_place" -* @ref mpi::broadcast "broadcast" -* @ref mpi::gather "gather" -* @ref mpi::reduce "reduce" -* @ref mpi::reduce_in_place "reduce_in_place" -* @ref mpi::scatter "scatter" - +**mpi** provides several generic @ref coll_comm "Collective MPI communication". They offer a much simpler interface than their MPI C library analogs. For example, the following broadcasts a `std::vector` from the process with rank 0 to all others: @@ -61,18 +51,11 @@ MPI_Bcast(vec.data(), static_cast(vec.size()), MPI_DOUBLE, 0, MPI_COMM_WORL Under the hood, the generic mpi::broadcast implementation calls the specialized @ref "mpi::mpi_broadcast(std::vector< T >&, mpi::communicator, int)". -The other generic functions are implemented in the same way. -See the "Functions" section in @ref coll_comm to check which datatypes are supported out of the box. +Other generic functions in **mpi** work similarly. +See the "Functions" section in @ref coll_comm to check which datatypes and MPI operations are supported out of the box. In case your datatype is not supported, you are free to provide your own specialization. -Furthermore, there are several functions to simplify communicating generic, contiguous ranges: -- mpi::broadcast_range, -- mpi::gather_range, -- mpi::reduce_in_place_range, -- mpi::reduce_range and -- mpi::scatter_range. - ## Lazy MPI communication @ref mpi_lazy can be used to provied collective MPI communication for lazy expression types. diff --git a/doc/ex4.md b/doc/ex4.md new file mode 100644 index 00000000..7d619d5b --- /dev/null +++ b/doc/ex4.md @@ -0,0 +1,63 @@ +@page ex4 Example 4: Provide custom spezializations + +[TOC] + +In this example, we show how to write a specialized `mpi_reduce_into` for a custom type. + +```cpp +#include +#include +#include + +// Custom type. +class foo { + public: + // Constructor. + foo(int x = 5) : x_(x) {} + + // Get the value stored in the class. + int x() const { return x_; } + + // Specialization of mpi_reduce_into for the custom type. + friend void mpi_reduce_into(foo const &f_in, foo &f_out, mpi::communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { + mpi::reduce_into(f_in.x_, f_out.x_, c, root, all, op); + } + + private: + int x_; +}; + +int main(int argc, char *argv[]) { + // initialize MPI environment + mpi::environment env(argc, argv); + mpi::communicator world; + + // create a vector of foo objects + std::vector vec {foo{1}, foo{2}, foo{3}, foo{4}, foo{5}}; + + // reduce the vector of foo objects + auto result = mpi::reduce(vec, world); + + // print the result on rank 0 + if (world.rank() == 0) { + std::cout << "Reduced vector: "; + for (auto const &f : result) std::cout << f.x() << " "; + std::cout << "\n"; + } +} +``` + +Output (running with `-n 4`): + +``` +Reduced vector: 4 8 12 16 20 +``` + +Note that by providing a simple `mpi_reduce_into` for our custom `foo` type, we are able to reduce a `std::vector` of +`foo` objects without any additional work. + +Under the hood, each `foo` object is reduced spearately using the above specialization. +For large amounts of data or in performance critical code sections, this might not be desired. +In such a case, it is usally better to make the type MPI compatible such that the reduction can be done with a single +call to MPI C library. +See @ref ex3 for more details. diff --git a/doc/examples.md b/doc/examples.md index 94c15688..2b85e2a2 100644 --- a/doc/examples.md +++ b/doc/examples.md @@ -5,13 +5,16 @@ - @ref ex1 "Example 1: Hello world!" - @ref ex2 "Example 2: Use monitor to communicate errors" - @ref ex3 "Example 3: Custom type and operator" +- @ref ex4 "Example 4: Provide custom spezializations" @section compiling Compiling the examples -All examples have been compiled on a MacBook Pro with an Apple M2 Max chip and [open-mpi](https://www.open-mpi.org/) 4.1.5. -We further used clang 16.0.6 together with cmake 3.27.2. +All examples have been compiled on a MacBook Pro with an Apple M2 Max chip and [open-mpi](https://www.open-mpi.org/) +5.0.1. +We further used clang 19.1.7 together with cmake 3.31.5. -Assuming that the actual example code is in a file `main.cpp`, the following generic `CMakeLists.txt` should work for all examples: +Assuming that the actual example code is in a file `main.cpp`, the following generic `CMakeLists.txt` should work for +all examples: ```cmake cmake_minimum_required(VERSION 3.20) @@ -28,7 +31,7 @@ include (FetchContent) FetchContent_Declare( mpi GIT_REPOSITORY https://github.com/TRIQS/mpi.git - GIT_TAG 1.2.x + GIT_TAG 1.3.x ) FetchContent_MakeAvailable(mpi) diff --git a/doc/groups.dox b/doc/groups.dox index 8675dcfb..0e77a743 100644 --- a/doc/groups.dox +++ b/doc/groups.dox @@ -51,17 +51,46 @@ * @brief Generic and specialized implementations for a subset of collective MPI communications (broadcast, reduce, * gather, scatter). * - * @details The generic functions (mpi::broadcast, mpi::reduce, mpi::scatter, ...) call their more specialized - * counterparts (e.g. mpi::mpi_broadcast, mpi::mpi_reduce, mpi::mpi_scatter, ...). - * - * **mpi** provides (some) implementations for - * - scalar types that have a corresponding mpi::mpi_type, - * - `std::vector` and `std::array` types with MPI compatible value types, - * - `std::string` and - * - `std::pair`. - * - * Furthermore, there are several functions to simplify communicating generic, contiguous ranges: mpi::broadcast_range, - * mpi::gather_range, mpi::reduce_in_place_range, mpi::reduce_range and mpi::scatter_range. + * @details **mpi** provides several generic collective communications routines as well as specializations for certain + * common types. The generic functions usually simply forward the call to one of the specializations (`mpi_broadcast`, + * `mpi_gather`, `mpi_gather_into`, `mpi_reduce`, `mpi_reduce_into`, `mpi_scatter` or `mpi_scatter_into`) using ADL but + * can also perform some additional checks. It is therefore recommended to always use the generic versions when + * possible. + * + * Here is a short overview of the available generic functions: + * - mpi::broadcast: Calls the specialization `mpi_broadcast`. + * - mpi::gather: Calls the specialization `mpi_gather` if it is implemented. Otherwise, it calls mpi::gather_into with + * a default constructed output object. + * - mpi::gather_into: Calls the specialization `mpi_gather_into`. + * - mpi::reduce: Calls the specialization `mpi_reduce` if it is implemented. Otherwise, it calls mpi::reduce_into with + * a default constructed output object. + * - mpi::reduce_in_place: Calls the specialization `mpi_reduce_into` with the same input and output object. + * - mpi::reduce_into: Calls the specialization `mpi_reduce_into`. + * - mpi::scatter: Calls the specialization `mpi_scatter` if it is implemented. Otherwise, it calls mpi::scatter_into + * with a default constructed output object. + * - mpi::scatter_into: Calls the specialization `mpi_scatter_into`. + * + * In case, all processes should receive the result of the MPI operation, one can use the convenience functions + * mpi::all_gather, mpi::all_gather_into, mpi::all_reduce, mpi::all_reduce_in_place or mpi::all_reduce_into. They + * forward the given arguments to their "non-all" counterparts with the `all` argument set to true. + * + * **mpi** provides various specializations for several types. For example, + * - for MPI compatible types, i.e. for types that have a corresponding mpi::mpi_type, it provides an + * @ref "mpi::mpi_broadcast(T &x, mpi::communicator, int)" "mpi_broadcast", + * @ref "mpi::mpi_reduce(T const &, mpi::communicator, int, bool, MPI_Op)" "mpi_reduce", + * @ref "mpi::mpi_reduce_into(T const &, T &, mpi::communicator, int, bool, MPI_Op)" "mpi_reduce_into", + * @ref "mpi::mpi_gather(T const &, mpi::communicator, int, bool)" "mpi_gather" and an + * @ref "mpi::mpi_gather_into(T const &, R &&, mpi::communicator, int, bool)" "mpi_gather_into". + * - for strings, it provides an @ref "mpi::mpi_broadcast(std::string &, mpi::communicator, int)" "mpi_broadcast" + * and an @ref "mpi::mpi_gather_into(std::string const &, std::string &, mpi::communicator, int, bool)" + * "mpi_gather_into". + * + * Users are encouraged to implement their own specializations for their custom types or in case a specialization is + * missing (see e.g. @ref ex4). + * + * Furthermore, there are several functions to simplify communicating (contiguous) ranges: mpi::broadcast_range, + * mpi::gather_range, mpi::reduce_range and mpi::scatter_range. Some of these range functions are more generic than + * others. Please check the documentation of the specific function for more details. */ /** diff --git a/test/c++/custom_types.hpp b/test/c++/custom_types.hpp new file mode 100644 index 00000000..f803ee7d --- /dev/null +++ b/test/c++/custom_types.hpp @@ -0,0 +1,69 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#pragma once + +#include +#include + +#include +#include +#include + +// Custom type which is MPI compatible. +struct mpi_t { + long a{0}; + bool operator==(const mpi_t &) const = default; + mpi_t operator+(mpi_t x) const { + x.a += a; + return x; + } +}; + +// Tie the data (to make it MPI compatible). +inline auto tie_data(mpi_t const &x) { return std::tie(x.a); } + +// Custom type which is not MPI compatible but has specialized mpi_xxx implementations. +struct non_mpi_t { + int a{1}; + bool operator==(const non_mpi_t &) const = default; +}; + +// Specialize mpi_broadcast for non_mpi_t. +void mpi_broadcast(non_mpi_t &x, mpi::communicator c = {}, int root = 0) { broadcast(x.a, c, root); } + +// Specialize mpi_reduce_into for non_mpi_t. +void mpi_reduce_into(non_mpi_t const &in, non_mpi_t &out, mpi::communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { + mpi::reduce_into(in.a, out.a, c, root, all, op); +} + +// Specialize mpi_gather for non_mpi_t. +std::vector mpi_gather(non_mpi_t const &x, mpi::communicator c = {}, int root = 0, bool all = false) { + std::vector a_vec = gather(x.a, c, root, all); + std::vector res{}; + if (c.rank() == root || all) { + res.resize(c.size()); + std::ranges::transform(a_vec, res.begin(), [](int a) { return non_mpi_t{a}; }); + } + return res; +} + +// Specialize mpi_gather_into for non_mpi_t. +void mpi_gather_into(non_mpi_t const &x, auto &&rg, mpi::communicator c = {}, int root = 0, bool all = false) { + auto vec = mpi_gather(x, c, root, all); + if (c.rank() == root || all) std::ranges::copy(vec, std::ranges::begin(rg)); +} diff --git a/test/c++/mpi_array.cpp b/test/c++/mpi_array.cpp deleted file mode 100644 index 884d0c80..00000000 --- a/test/c++/mpi_array.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2020-2024 Simons Foundation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Nils Wentzell - -#include "./non_mpi_t.hpp" - -#include -#include -#include - -#include -#include -#include - -TEST(MPI, ArrayBroadcastMPIType) { - // broadcast an array with an MPI type - mpi::communicator world; - std::array arr{}; - if (world.rank() == 0) std::iota(arr.begin(), arr.end(), 0); - mpi::broadcast(arr, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i); -} - -TEST(MPI, ArrayBroadcastTypeWithSpezializedMPIBroadcast) { - // broadcast an array with a type that has a specialized mpi_broadcast - mpi::communicator world; - std::array arr{}; - if (world.rank() == 0) { - for (int i = 0; i < 5; ++i) arr[i].a = i; - } - mpi::broadcast(arr, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i].a, i); -} - -TEST(MPI, ArrayReduceInPlaceMPIType) { - // in-place reduce an array with an MPI type - mpi::communicator world; - std::array arr{0, 1, 2, 3, 4}; - mpi::reduce_in_place(arr, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i); - - // in-place allreduce an array with an MPI type - std::iota(arr.begin(), arr.end(), 0); - mpi::all_reduce_in_place(arr, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i * world.size()); -} - -TEST(MPI, ArrayReduceInPlaceTypeWithSpezializedMPIReduceInPlace) { - // in-place reduce an array with a type that has a specialized mpi_reduce_in_place - mpi::communicator world; - std::array arr{}; - for (int i = 0; i < 5; ++i) arr[i].a = i; - mpi::reduce_in_place(arr, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i].a, i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i].a, i); - - // in-place allreduce an array with a type that has a specialized mpi_reduce_in_place - for (int i = 0; i < 5; ++i) arr[i].a = i; - mpi::all_reduce_in_place(arr, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i].a, i * world.size()); -} - -TEST(MPI, ArrayReduceMPIType) { - // reduce an array with complex numbers - mpi::communicator world; - using arr_type = std::array, 7>; - const int size = 7; - arr_type arr{}; - for (int i = 0; i < size; ++i) arr[i] = std::complex(i, -i); - auto arr_reduced = mpi::reduce(arr, world); - if (world.rank() == 0) - for (int i = 0; i < size; ++i) EXPECT_EQ(arr_reduced[i], std::complex(i * world.size(), -i * world.size())); - else - EXPECT_EQ(arr_reduced, arr_type{}); - - // allreduce an array with complex numbers - auto arr_reduced_all = mpi::all_reduce(arr, world); - for (int i = 0; i < size; ++i) EXPECT_EQ(arr_reduced_all[i], std::complex(i * world.size(), -i * world.size())); -} - -TEST(MPI, EmptyArrayReduce) { - // reduce an empty array - mpi::communicator world; - std::array arr{}; - std::ignore = mpi::reduce(arr, world); -} - -MPI_TEST_MAIN; diff --git a/test/c++/mpi_broadcast.cpp b/test/c++/mpi_broadcast.cpp new file mode 100644 index 00000000..c7b91243 --- /dev/null +++ b/test/c++/mpi_broadcast.cpp @@ -0,0 +1,52 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include + +// Test broadcasting a single value/object. +template void test_broadcast(T root_value) { + mpi::communicator world; + for (int root = 0; root < world.size(); ++root) { + T bcast_value{}; + if (world.rank() == root) bcast_value = root_value; + mpi::broadcast(bcast_value, world, root); + EXPECT_EQ(bcast_value, root_value); + } +} + +TEST(MPI, BroadcastInteger) { test_broadcast(42); } + +TEST(MPI, BroadcastComplex) { test_broadcast(std::complex{1.0, 2.0}); } + +TEST(MPI, BroadcastCustomMPIType) { test_broadcast(mpi_t{42}); } + +TEST(MPI, BroadcastCustomNonMPIType) { test_broadcast(non_mpi_t{42}); } + +TEST(MPI, BroadcastString) { test_broadcast(std::string{"Hello World"}); } + +TEST(MPI, BroadcastPairOfStringAndComplex) { test_broadcast(std::make_pair(std::string{"Hello"}, std::complex{1.0, 2.0})); } + +TEST(MPI, BroadcastPairOfCustomMPITypeAndCustomNonMPIType) { test_broadcast(std::make_pair(mpi_t{42}, non_mpi_t{-5})); } + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_broadcast_array.cpp b/test/c++/mpi_broadcast_array.cpp new file mode 100644 index 00000000..e5a50dd6 --- /dev/null +++ b/test/c++/mpi_broadcast_array.cpp @@ -0,0 +1,82 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test broadcasting arrays. +template void test_broadcast_array(std::array const &root_values) { + mpi::communicator world; + auto arr = root_values; + + // broadcast an array from different roots + for (int root = 0; root < world.size(); ++root) { + arr = {}; + if (world.rank() == root) arr = root_values; + mpi::broadcast(arr, world, root); + expect_range_eq(arr, root_values); + } + + // broadcast an empty array + std::array empty_arr{}; + mpi::broadcast(empty_arr, world); + expect_range_eq(arr, root_values); +} + +TEST(MPI, BroadcastIntegerArray) { test_broadcast_array(std::array{1, 2, 3, 4, 5}); } + +TEST(MPI, BroadcastComplexArray) { + using namespace std::complex_literals; + test_broadcast_array(std::array, 5>{1.0 - 1.0i, 2.0 - 2.0i, 3.0 - 3.0i, 4.0 - 4.0i, 5.0 - 5.0i}); +} + +TEST(MPI, BroadcastCustomMPITypeArray) { test_broadcast_array(std::array{mpi_t{1}, mpi_t{2}, mpi_t{3}, mpi_t{4}, mpi_t{5}}); } + +TEST(MPI, BroadcastCustomNonMPITypeArray) { + test_broadcast_array(std::array{non_mpi_t{1}, non_mpi_t{2}, non_mpi_t{3}, non_mpi_t{4}, non_mpi_t{5}}); +} + +TEST(MPI, BroadcastStringArray) { test_broadcast_array(std::array{"Hello", "World", "MPI", "Broadcast", "Array"}); } + +TEST(MPI, BroadcastPairArray) { + test_broadcast_array(std::array, 5>{{{1, "Hello"}, {2, "World"}, {3, "MPI"}, {4, "Broadcast"}, {5, "Array"}}}); +} + +TEST(MPI, BroadcastArrayOfDoubleArrays) { + std::array, 5> root_values{}; + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 2; ++j) root_values[i][j] = i * 2 + j; + } + test_broadcast_array(root_values); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_broadcast_range.cpp b/test/c++/mpi_broadcast_range.cpp new file mode 100644 index 00000000..bde14643 --- /dev/null +++ b/test/c++/mpi_broadcast_range.cpp @@ -0,0 +1,89 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test broadcasting a range of objects. +template void test_broadcast_range(std::array root_values) { + mpi::communicator world; + std::array def_arr{}; + def_arr.fill(root_values[0]); + + // broadcast a contiguous range from different roots + auto arr = root_values; + for (int root = 0; root < world.size(); ++root) { + if (world.rank() == root) { + arr = root_values; + mpi::broadcast_range(std::span{arr.begin() + 2, 3}, world, root); + expect_range_eq(arr, root_values); + } else { + arr = def_arr; + mpi::broadcast_range(std::span{arr.begin(), 3}, world, root); + expect_range_eq(std::span{arr.begin(), 3}, std::span{root_values.begin() + 2, 3}); + expect_range_eq(std::span{arr.begin() + 3, 2}, std::span{def_arr.begin() + 3, 2}); + } + } + + // broadcast a view on a non-contiguous list + std::list list(def_arr.begin(), def_arr.end()); + if (world.rank() == 0) list.assign(root_values.begin(), root_values.end()); + mpi::broadcast_range(std::ranges::drop_view(list, 2), world); + if (world.rank() == 0) { + expect_range_eq(list, root_values); + } else { + expect_range_eq(std::ranges::drop_view(list, 2), std::ranges::drop_view(root_values, 2)); + expect_range_eq(std::ranges::take_view(list, 2), std::ranges::take_view(def_arr, 2)); + } +} + +TEST(MPI, BroadcastIntegerRange) { test_broadcast_range(std::array{1, 2, 3, 4, 5}); } + +TEST(MPI, BroadcastComplexRange) { + using namespace std::complex_literals; + test_broadcast_range(std::array, 5>{1.0 - 1.0i, 2.0 - 2.0i, 3.0 - 3.0i, 4.0 - 4.0i, 5.0 - 5.0i}); +} + +TEST(MPI, BroadcastCustomMPITypeRange) { test_broadcast_range(std::array{mpi_t{1}, mpi_t{2}, mpi_t{3}, mpi_t{4}, mpi_t{5}}); } + +TEST(MPI, BroadcastCustomNonMPITypeRange) { + test_broadcast_range(std::array{non_mpi_t{1}, non_mpi_t{2}, non_mpi_t{3}, non_mpi_t{4}, non_mpi_t{5}}); +} + +TEST(MPI, BroadcastStringRange) { test_broadcast_range(std::array{"Hello", "World", "MPI", "Broadcast", "Array"}); } + +TEST(MPI, BroadcastPairRange) { + test_broadcast_range(std::array, 5>{{{1, "Hello"}, {2, "World"}, {3, "MPI"}, {4, "Broadcast"}, {5, "Array"}}}); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_broadcast_vector.cpp b/test/c++/mpi_broadcast_vector.cpp new file mode 100644 index 00000000..92ba8eef --- /dev/null +++ b/test/c++/mpi_broadcast_vector.cpp @@ -0,0 +1,88 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test broadcasting vectors. +template void test_broadcast_vector(std::vector const &root_values) { + mpi::communicator world; + auto vec = root_values; + + // broadcast a vector from different roots + for (int root = 0; root < world.size(); ++root) { + vec.clear(); + if (world.rank() == root) vec = root_values; + mpi::broadcast(vec, world, root); + expect_range_eq(vec, root_values); + } + + // broadcast an empty vector + if (world.rank() == 0) { + vec.clear(); + mpi::broadcast(vec, world); + EXPECT_TRUE(vec.empty()); + } else { + vec = root_values; + mpi::broadcast(vec, world); + EXPECT_TRUE(vec.empty()); + } +} + +TEST(MPI, BroadcastIntegerVector) { test_broadcast_vector(std::vector{1, 2, 3, 4, 5}); } + +TEST(MPI, BroadcastComplexVector) { + using namespace std::complex_literals; + test_broadcast_vector(std::vector>{1.0 - 1.0i, 2.0 - 2.0i, 3.0 - 3.0i, 4.0 - 4.0i, 5.0 - 5.0i}); +} + +TEST(MPI, BroadcastCustomMPITypeVector) { test_broadcast_vector(std::vector{mpi_t{1}, mpi_t{2}, mpi_t{3}, mpi_t{4}, mpi_t{5}}); } + +TEST(MPI, BroadcastCustomNonMPITypeVector) { + test_broadcast_vector(std::vector{non_mpi_t{1}, non_mpi_t{2}, non_mpi_t{3}, non_mpi_t{4}, non_mpi_t{5}}); +} + +TEST(MPI, BroadcastStringVector) { test_broadcast_vector(std::vector{"Hello", "World", "MPI", "Broadcast", "Array"}); } + +TEST(MPI, BroadcastPairVector) { + test_broadcast_vector(std::vector>{{{1, "Hello"}, {2, "World"}, {3, "MPI"}, {4, "Broadcast"}, {5, "Array"}}}); +} + +TEST(MPI, BroadcastVectorOfDoubleVectors) { + std::vector> root_values(5, std::vector(2)); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 2; ++j) root_values[i][j] = i * 2 + j; + } + test_broadcast_vector(root_values); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_cplx.cpp b/test/c++/mpi_cplx.cpp deleted file mode 100644 index 13c14451..00000000 --- a/test/c++/mpi_cplx.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022-2024 Simons Foundation -// Copyright (c) 2022 Hugo U.R. Strand -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Hugo U.R. Strand - -#include -#include - -#include - -TEST(MPI, ComplexBroadcast) { - // broadcast a complex number - mpi::communicator world; - - std::complex cplx; - if (world.rank() == 0) cplx = std::complex(1., 2.); - - mpi::broadcast(cplx); - - EXPECT_EQ(cplx, std::complex(1., 2.)); -} - -MPI_TEST_MAIN; diff --git a/test/c++/mpi_gather.cpp b/test/c++/mpi_gather.cpp new file mode 100644 index 00000000..4d599285 --- /dev/null +++ b/test/c++/mpi_gather.cpp @@ -0,0 +1,127 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test gathering single values/objects. +template void test_gather(std::vector result) { + mpi::communicator world; + + // gather from different roots + for (int root = 0; root < world.size(); ++root) { + // gather single objects into a vector + auto vec = mpi::gather(result[world.rank()], world, root); + if (world.rank() == root) + expect_range_eq(vec, result); + else + EXPECT_TRUE(vec.empty()); + + // gather single objects into an existing vector + if (world.rank() == root) { + vec.assign(world.size(), T{0}); + mpi::gather_into(result[world.rank()], vec, world, root); + expect_range_eq(vec, result); + } else { + vec.clear(); + mpi::gather_into(result[world.rank()], vec, world, root); + EXPECT_TRUE(vec.empty()); + } + } + + // allgather single objects into a vector + auto vec = mpi::all_gather(result[world.rank()], world); + expect_range_eq(vec, result); + + // allgather single objects into an existing vector + vec.assign(world.size(), T{0}); + mpi::all_gather_into(result[world.rank()], vec, world); + expect_range_eq(vec, result); +} + +TEST(MPI, GatherInteger) { + mpi::communicator world; + std::vector result(world.size()); + for (int i = 0; i < world.size(); ++i) result[i] = i + 1; + test_gather(result); +} + +TEST(MPI, GatherComplex) { + mpi::communicator world; + std::vector> result(world.size()); + for (int i = 0; i < world.size(); ++i) result[i] = std::complex{i + 1.0, -(i + 1.0)}; + test_gather(result); +} + +TEST(MPI, GatherCustomMPIType) { + mpi::communicator world; + std::vector result(world.size()); + for (int i = 0; i < world.size(); ++i) result[i] = mpi_t{i + 1}; + test_gather(result); +} + +TEST(MPI, GatherCustomNonMPIType) { + mpi::communicator world; + std::vector result(world.size()); + for (int i = 0; i < world.size(); ++i) result[i] = non_mpi_t{i + 1}; + test_gather(result); +} + +// Test gathering a string. +TEST(MPI, GatherString) { + mpi::communicator world; + std::string str{}, result{}; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < i + 1; ++j) result += "a"; + result += std::to_string(i); + } + for (int i = 0; i < world.rank() + 1; ++i) str += "a"; + str += std::to_string(world.rank()); + + // gather strings + for (int root = 0; root < world.size(); ++root) { + auto str_gathered = mpi::gather(str, world, root); + if (world.rank() == root) + EXPECT_EQ(str_gathered, result); + else + EXPECT_TRUE(str_gathered.empty()); + } + + // allgather strings + auto str_gathered = mpi::all_gather(str); + EXPECT_EQ(str_gathered, result); + + // allgather empty strings + auto empty_str = mpi::all_gather(std::string{}); + EXPECT_TRUE(empty_str.empty()); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_gather_range.cpp b/test/c++/mpi_gather_range.cpp new file mode 100644 index 00000000..2768db7b --- /dev/null +++ b/test/c++/mpi_gather_range.cpp @@ -0,0 +1,88 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test gathering a range of objects. +template void test_gather_range(std::vector const &values, std::vector const &result) { + mpi::communicator world; + + // gather on different roots + for (int root = 0; root < world.size(); ++root) { + // gather spans into a view of a vector + std::vector vec(result.size() * 2, T{0}); + mpi::gather_range(std::span{values}, std::ranges::drop_view(vec, result.size()), world, root); + if (world.rank() == root) { + expect_range_eq(std::ranges::drop_view(vec, result.size()), result); + expect_range_eq(std::ranges::take_view(vec, result.size()), std::vector(result.size(), T{0})); + } else { + expect_range_eq(vec, std::vector(result.size() * 2, T{0})); + } + } + + // allgather vectors into an oversized vector + std::vector vec(result.size() * 2, T{0}); + mpi::gather_range(values, std::span{vec.begin(), result.size()}, world, 0, true); + expect_range_eq(std::ranges::take_view(vec, result.size()), result); + expect_range_eq(std::ranges::drop_view(vec, result.size()), std::vector(result.size(), T{0})); +} + +TEST(MPI, GatherIntegerRange) { + mpi::communicator world; + std::vector values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i); + }; + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank()); + test_gather_range(values, result); +} + +TEST(MPI, GatherComplexRange) { + mpi::communicator world; + std::vector> values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i, -i); + } + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank(), -world.rank()); + test_gather_range(values, result); +} + +TEST(MPI, GatherCustomMPITypeRange) { + mpi::communicator world; + std::vector values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i); + } + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank()); + test_gather_range(values, result); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_gather_vector.cpp b/test/c++/mpi_gather_vector.cpp new file mode 100644 index 00000000..f5a8cadd --- /dev/null +++ b/test/c++/mpi_gather_vector.cpp @@ -0,0 +1,102 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test gathering vectors. +template void test_gather_vector(std::vector const &values, std::vector const &result) { + mpi::communicator world; + + // gather on different roots + for (int root = 0; root < world.size(); ++root) { + if constexpr (mpi::has_mpi_type) { + // gather vectors into a new vector + auto vec = mpi::gather(values, world, root); + if (world.rank() == root) + expect_range_eq(vec, result); + else + EXPECT_TRUE(vec.empty()); + + // gather vectors into an existing vector + vec.clear(); + mpi::gather_into(values, vec, world, root); + if (world.rank() == root) + expect_range_eq(vec, result); + else + EXPECT_TRUE(vec.empty()); + } + + // gather empty vectors + auto vec = mpi::gather(std::vector{}, world, root); + EXPECT_TRUE(vec.empty()); + } + + // allgather vectors into a new vector + auto vec = mpi::all_gather(values, world); + expect_range_eq(vec, result); + + // allgather vectors into an existing vector + vec.clear(); + mpi::all_gather_into(values, vec, world); + expect_range_eq(vec, result); +} + +TEST(MPI, GatherIntegerVector) { + mpi::communicator world; + std::vector values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i); + } + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank()); + test_gather_vector(values, result); +} + +TEST(MPI, GatherComplexVector) { + mpi::communicator world; + std::vector> values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i, -i); + } + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank(), -world.rank()); + test_gather_vector(values, result); +} + +TEST(MPI, GatherCustomMPITypeVector) { + mpi::communicator world; + std::vector values, result; + for (int i = 0; i < world.size(); ++i) { + for (int j = 0; j < 2 * (i + 1); ++j) result.emplace_back(i); + } + for (int i = 0; i < 2 * (world.rank() + 1); ++i) values.emplace_back(world.rank()); + test_gather_vector(values, result); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_pair.cpp b/test/c++/mpi_pair.cpp deleted file mode 100644 index 46b9616d..00000000 --- a/test/c++/mpi_pair.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2021-2024 Simons Foundation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Nils Wentzell - -#include -#include - -#include -#include -#include - -TEST(MPI, PairBroadcast) { - // broadcast a pair consisting of a string and a complex number - std::pair> p; - - auto str = std::string{"Hello"}; - auto cplx = std::complex(1.0, 2.0); - - mpi::communicator world; - if (world.rank() == 0) p = {str, cplx}; - - mpi::broadcast(p); - auto [str_bc, cplx_bc] = p; - EXPECT_EQ(str, str_bc); - EXPECT_EQ(cplx, cplx_bc); -} - -TEST(MPI, PairReduce) { - // reduce a pair of integers - mpi::communicator world; - auto r = world.rank(); - auto p = std::pair{1, r}; - - auto [r1, r2] = mpi::all_reduce(p); - auto nr = world.size(); - EXPECT_EQ(r1, nr); - EXPECT_EQ(r2, nr * (nr - 1) / 2); -} - -MPI_TEST_MAIN; diff --git a/test/c++/mpi_ranges.cpp b/test/c++/mpi_ranges.cpp deleted file mode 100644 index ad0ad63e..00000000 --- a/test/c++/mpi_ranges.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) 2022-2024 Simons Foundation -// Copyright (c) 2022 Hugo U.R. Strand -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Hugo U.R. Strand - -#include "./non_mpi_t.hpp" - -#include -#include - -#include -#include -#include - -TEST(MPI, RangesBroadcastMPIType) { - // broadcast a range with an MPI type - mpi::communicator world; - std::array arr{}; - if (world.rank() == 0) { - for (int i = 0; i < 5; ++i) arr[i] = i; - } - mpi::broadcast_range(arr, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i); -} - -TEST(MPI, RangesBroadcastTypeWithSpezializedMPIBroadcast) { - // broadcast a range with a type that has a specialized mpi_broadcast - mpi::communicator world; - std::vector vec(5); - if (world.rank() == 0) { - for (int i = 0; i < 5; ++i) vec[i].a = i; - } - mpi::broadcast_range(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i); -} - -TEST(MPI, RangesReduceInPlaceMPIType) { - // in-place reduce a range with an MPI type - mpi::communicator world; - std::array arr{0, 1, 2, 3, 4}; - mpi::reduce_in_place_range(arr, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i); - - // in-place allreduce a range with an MPI type - arr = {0, 1, 2, 3, 4}; - mpi::reduce_in_place_range(arr, world, 0, true); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr[i], i * world.size()); -} - -TEST(MPI, RangesReduceInPlaceTypeWithSpezializedMPIReduceInPlace) { - // in-place reduce a range with a type that has a specialized mpi_reduce_in_place - mpi::communicator world; - std::vector vec(5); - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::reduce_in_place_range(vec, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i); - - // in-place allreduce a range with a type that has a specialized mpi_reduce_in_place - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::reduce_in_place_range(vec, world, 0, true); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i * world.size()); -} - -TEST(MPI, RangesReduceMPIType) { - // reduce a range with an MPI type - mpi::communicator world; - std::array arr{0, 1, 2, 3, 4}, arr_red{}; - mpi::reduce_range(arr, arr_red, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr_red[i], i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr_red[i], 0); - - // allreduce a range with an MPI type - arr = {0, 1, 2, 3, 4}; - arr_red = {}; - mpi::reduce_range(arr, arr_red, world, 0, true); - for (int i = 0; i < 5; ++i) EXPECT_EQ(arr_red[i], i * world.size()); -} - -TEST(MPI, RangesReduceTypeWithSpezializedMPIReduceInPlace) { - // reduce a range with a type that has a specialized mpi_reduce_in_place - mpi::communicator world; - std::vector vec(5, non_mpi_t{}), vec_red(5, non_mpi_t{}); - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::reduce_range(vec, vec_red, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec_red[i].a, i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec_red[i].a, non_mpi_t{}.a); - - // allreduce a range with a type that has a specialized mpi_reduce_in_place - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::reduce_range(vec, vec_red, world, 0, true); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec_red[i].a, i * world.size()); -} - -TEST(MPI, RangesScatterMPIType) { - // scatter a range with an MPI type - mpi::communicator world; - auto const rank = world.rank(); - auto sizes = std::vector(world.size()); - for (int i = 0; i < world.size(); ++i) sizes[i] = static_cast(mpi::chunk_length(10, world.size(), i)); - auto acc_sizes = std::vector(world.size() + 1, 0); - std::partial_sum(sizes.begin(), sizes.end(), std::next(acc_sizes.begin())); - std::vector vec(10, 0), vec_scattered(sizes[rank], 0); - if (rank == 0) { - for (int i = 0; i < 10; ++i) vec[i] = i; - } - mpi::scatter_range(vec, vec_scattered, 10, world, 0); - for (int i = 0; i < sizes[rank]; ++i) EXPECT_EQ(vec_scattered[i], i + acc_sizes[rank]); -} - -TEST(MPI, RangesGatherMPIType) { - // gather a range with an MPI type - mpi::communicator world; - auto const rank = world.rank(); - auto const gathered_size = (world.size() + 1) * world.size() / 2; - std::vector vec(world.rank() + 1, 0), vec_gathered(gathered_size, 0); - std::iota(vec.begin(), vec.end(), rank * (rank + 1) / 2); - mpi::gather_range(vec, vec_gathered, gathered_size, world, 0, false); - if (rank == 0) { - for (int i = 0; i < gathered_size; ++i) EXPECT_EQ(vec_gathered[i], i); - } -} - -TEST(MPI, RangesGatherTypeWithSpecializedMPIBroadcast) { - // gather a range with a type that has a specialized mpi_broadcast - mpi::communicator world; - auto const rank = world.rank(); - auto const gathered_size = (world.size() + 1) * world.size() / 2; - std::vector vec(world.rank() + 1, non_mpi_t{}), vec_gathered(gathered_size, non_mpi_t{}); - for (int i = 0; i < vec.size(); ++i) vec[i].a = i + rank * (rank + 1) / 2; - - // providing the size of the output range - mpi::gather_range(vec, vec_gathered, gathered_size, world, 0, true); - for (int i = 0; i < gathered_size; ++i) EXPECT_EQ(vec_gathered[i].a, i); -} - -MPI_TEST_MAIN; diff --git a/test/c++/mpi_reduce.cpp b/test/c++/mpi_reduce.cpp new file mode 100644 index 00000000..67a06fc4 --- /dev/null +++ b/test/c++/mpi_reduce.cpp @@ -0,0 +1,121 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include + +// Test reducing a single value/object. +template void test_reduce(T value, T result, T def_value, MPI_Op op = MPI_SUM) { + mpi::communicator world; + + // reduce from different roots + for (int root = 0; root < world.size(); ++root) { + // reduce an object into new object + auto red_value = mpi::reduce(value, world, root, false, op); + if (world.rank() == root) { EXPECT_EQ(red_value, result); } + + // reduce an object in place + red_value = value; + mpi::reduce_in_place(red_value, world, root, false, op); + if (world.rank() == root) + EXPECT_EQ(red_value, result); + else + EXPECT_EQ(red_value, value); + + // reduce an object into an existing object + red_value = def_value; + mpi::reduce_into(value, red_value, world, root, false, op); + if (world.rank() == root) + EXPECT_EQ(red_value, result); + else + EXPECT_EQ(red_value, def_value); + } + + // allreduce an object into a new object + auto red_value = mpi::all_reduce(value, world, op); + EXPECT_EQ(red_value, result); + + // allreduce an object in place + red_value = value; + mpi::all_reduce_in_place(red_value, world, op); + EXPECT_EQ(red_value, result); + + // allreduce an object using all_reduce_into + red_value = value; + mpi::all_reduce_into(value, red_value, world, op); + EXPECT_EQ(red_value, result); + + // allreduce an object in place using all_reduce_into + red_value = value; + mpi::all_reduce_into(red_value, red_value, world, op); + EXPECT_EQ(red_value, result); +} + +TEST(MPI, ReduceInteger) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + test_reduce(rank, red_rank, 0); +} + +TEST(MPI, ReduceComplex) { + mpi::communicator world; + double rank = world.rank() + 1.0; + double red_rank = world.size() * (world.size() + 1) * 0.5; + test_reduce(std::complex{rank, -rank}, std::complex{red_rank, -red_rank}, std::complex{0, 0}); +} + +TEST(MPI, ReduceCustomMPIType) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + if (world.size() > 1) test_reduce(mpi_t{rank}, mpi_t{red_rank}, mpi_t{0}, mpi::map_add()); +} + +TEST(MPI, ReduceCustomNonMPIType) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + test_reduce(non_mpi_t{rank}, non_mpi_t{red_rank}, non_mpi_t{0}); +} + +// Test reducing a pair. +TEST(MPI, ReducePair) { + mpi::communicator world; + + // allreduce a pair of integers + auto p1 = mpi::all_reduce(std::pair{world.rank(), -world.rank()}, world, MPI_MAX); + EXPECT_EQ(p1.first, world.size() - 1); + EXPECT_EQ(p1.second, 0); + + // reduce a pair of non_mpi_t + auto p2 = mpi::reduce(std::pair{non_mpi_t{1}, non_mpi_t{world.rank() + 1}}, world, world.size() - 1); + if (world.rank() == world.size() - 1) { + EXPECT_EQ(p2.first, non_mpi_t(world.size())); + EXPECT_EQ(p2.second, non_mpi_t(world.size() * (world.size() + 1) / 2)); + } else { + EXPECT_EQ(p2.first, non_mpi_t()); + EXPECT_EQ(p2.second, non_mpi_t()); + } +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_reduce_array.cpp b/test/c++/mpi_reduce_array.cpp new file mode 100644 index 00000000..96720a2a --- /dev/null +++ b/test/c++/mpi_reduce_array.cpp @@ -0,0 +1,127 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test reducing arrays. +template void test_reduce_array(std::array const &values, std::array const &result, MPI_Op op = MPI_SUM) { + mpi::communicator world; + + // reduce from different roots + for (int root = 0; root < world.size(); ++root) { + // reduce an array into a new array + auto arr = mpi::reduce(values, world, root, false, op); + if (world.rank() == root) expect_range_eq(arr, result); + + // reduce an empty array + std::array empty_arr{}; + auto empty_red = mpi::reduce(empty_arr, world, root, false, op); + static_assert(empty_red.size() == 0); + + // reduce an array in place + arr = values; + mpi::reduce_in_place(arr, world, root, false, op); + if (world.rank() == root) + expect_range_eq(arr, result); + else + expect_range_eq(arr, values); + + // reduce an array into an existing array + arr = {}; + mpi::reduce_into(values, arr, world, root, false, op); + if (world.rank() == root) expect_range_eq(arr, result); + + // reduce an empty array into an existing array + mpi::reduce_into(empty_arr, empty_arr, world, root, false, op); + } + + // allreduce an array into new array + auto arr = mpi::all_reduce(values, world, op); + expect_range_eq(arr, result); + + // allreduce an array in place + arr = values; + mpi::all_reduce_in_place(arr, world, op); + expect_range_eq(arr, result); + + // allreduce an array in place using all_reduce_into + arr = values; + mpi::all_reduce_into(arr, arr, world, op); + expect_range_eq(arr, result); +} + +TEST(MPI, ReduceIntegerArray) { + mpi::communicator world; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = (i + 1) * (world.rank() + 1); + result[i] = (i + 1) * world.size() * (world.size() + 1) / 2; + } + test_reduce_array(values, result); +} + +TEST(MPI, ReduceComplexArray) { + mpi::communicator world; + double rank = world.rank() + 1.0; + double red_rank = world.size() * (world.size() + 1) * 0.5; + std::array, 5> values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = std::complex{rank * (i + 1), -rank * (i + 1)}; + result[i] = std::complex{red_rank * (i + 1), -red_rank * (i + 1)}; + } + test_reduce_array(values, result); +} + +TEST(MPI, ReduceCustomMPITypeArray) { + mpi::communicator world; + long rank = world.rank() + 1; + long red_rank = world.size() * (world.size() + 1) / 2; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = mpi_t{rank * (i + 1)}; + result[i] = mpi_t{red_rank * (i + 1)}; + } + if (world.size() > 1) { test_reduce_array(values, result, mpi::map_add()); } +} + +TEST(MPI, ReduceCustomNonMPITypeArray) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = non_mpi_t{rank * (i + 1)}; + result[i] = non_mpi_t{red_rank * (i + 1)}; + } + test_reduce_array(values, result); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_reduce_range.cpp b/test/c++/mpi_reduce_range.cpp new file mode 100644 index 00000000..5f8cd193 --- /dev/null +++ b/test/c++/mpi_reduce_range.cpp @@ -0,0 +1,154 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test reducing a range of objects. +template void test_reduce_range(std::array const &values, std::array const &result, MPI_Op op = MPI_SUM) { + mpi::communicator world; + + // reduce from different roots + for (int root = 0; root < world.size(); ++root) { + // reduce a span into an array + auto arr = values; + mpi::reduce_range(std::span{values.data() + 2, 3}, std::span{arr.begin(), 3}, world, root, false, op); + if (world.rank() == root) { + expect_range_eq(std::span{arr.data(), 3}, std::span{result.data() + 2, 3}); + expect_range_eq(std::span{arr.data() + 3, 2}, std::span{values.data() + 3, 2}); + } else { + expect_range_eq(arr, values); + } + + // reduce a list into a list + std::list list(values.begin(), values.end()), list_red(values.begin(), values.end()); + if (world.rank() == root) { + mpi::reduce_range(list, list_red, world, root, false, op); + expect_range_eq(list_red, result); + } else { + list_red.clear(); + mpi::reduce_range(list, list_red, world, root, false, op); + EXPECT_TRUE(list_red.empty()); + } + + // reduce a view on a list in place + list.assign(values.begin(), values.end()); + mpi::reduce_range(std::ranges::take_view(list, 2), std::ranges::take_view(list, 2), world, root, false, op); + if (world.rank() == root) { + expect_range_eq(std::ranges::take_view(list, 2), std::ranges::take_view(result, 2)); + expect_range_eq(std::ranges::drop_view(list, 2), std::ranges::drop_view(values, 2)); + } else { + expect_range_eq(list, values); + } + + // reduce a span in place + arr = values; + mpi::reduce_range(std::span{arr.data() + 2, 3}, std::span{arr.data() + 2, 3}, world, root, false, op); + if (world.rank() == root) { + expect_range_eq(std::span{arr.data() + 2, 3}, std::span{result.data() + 2, 3}); + expect_range_eq(std::span{arr.data(), 2}, std::span{values.data(), 2}); + } else { + expect_range_eq(arr, values); + } + + // reduce an array into a list + if (world.rank() == root) { + list = std::list(5); + mpi::reduce_range(values, list, world, root, false, op); + expect_range_eq(list, result); + } else { + list.clear(); + mpi::reduce_range(values, list, world, root, false, op); + EXPECT_TRUE(list.empty()); + } + } + + // allreduce a list in place using reduce_range + std::list list(values.begin(), values.end()); + mpi::reduce_range(list, list, world, 0, true, op); + expect_range_eq(list, result); + + // allreduce a span in place + auto arr = values; + mpi::reduce_range(std::span{arr.data() + 1, 3}, std::span{arr.data() + 1, 3}, world, 0, true, op); + expect_range_eq(std::span{arr.data() + 1, 3}, std::span{result.data() + 1, 3}); + EXPECT_EQ(arr[0], values[0]); + EXPECT_EQ(arr[4], values[4]); +} + +TEST(MPI, ReduceIntegerRange) { + mpi::communicator world; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = (i + 1) * (world.rank() + 1); + result[i] = (i + 1) * world.size() * (world.size() + 1) / 2; + } + test_reduce_range(values, result); +} + +TEST(MPI, ReduceComplexRange) { + mpi::communicator world; + double rank = world.rank() + 1.0; + double red_rank = world.size() * (world.size() + 1) * 0.5; + std::array, 5> values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = std::complex{rank * (i + 1), -rank * (i + 1)}; + result[i] = std::complex{red_rank * (i + 1), -red_rank * (i + 1)}; + } + test_reduce_range(values, result); +} + +TEST(MPI, ReduceCustomMPITypeRange) { + mpi::communicator world; + long rank = world.rank() + 1; + long red_rank = world.size() * (world.size() + 1) / 2; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = mpi_t{rank * (i + 1)}; + result[i] = mpi_t{red_rank * (i + 1)}; + } + if (world.size() > 1) { test_reduce_range(values, result, mpi::map_add()); } +} + +TEST(MPI, ReduceCustomNonMPITypeRange) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + std::array values{}, result{}; + for (int i = 0; i < 5; ++i) { + values[i] = non_mpi_t{rank * (i + 1)}; + result[i] = non_mpi_t{red_rank * (i + 1)}; + } + test_reduce_range(values, result); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_reduce_vector.cpp b/test/c++/mpi_reduce_vector.cpp new file mode 100644 index 00000000..29142bcb --- /dev/null +++ b/test/c++/mpi_reduce_vector.cpp @@ -0,0 +1,138 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test reducing a vector. +template void test_reduce_vector(std::vector const &values, std::vector const &result, MPI_Op op = MPI_SUM) { + mpi::communicator world; + + // reduce from different roots + for (int root = 0; root < world.size(); ++root) { + // reduce a vector into a new vector + auto vec = mpi::reduce(values, world, root, false, op); + if (world.rank() == root) expect_range_eq(vec, result); + + // reduce an empty vector + auto empty_vec = mpi::reduce(std::vector{}, world, root, false, op); + EXPECT_EQ(empty_vec.size(), 0); + + // reduce a vector in place + vec = values; + mpi::reduce_in_place(vec, world, root, false, op); + if (world.rank() == root) + expect_range_eq(vec, result); + else + expect_range_eq(vec, values); + + // reduce an empty vector in place + mpi::reduce_in_place(empty_vec, world, root, false, op); + EXPECT_EQ(empty_vec.size(), 0); + + // reduce a vector into an existing empty vector + vec.clear(); + mpi::reduce_into(values, vec, world, root, false, op); + if (world.rank() == root) + expect_range_eq(vec, result); + else + EXPECT_TRUE(vec.empty()); + + // reduce an empty vector into an existing vector + vec = values; + mpi::reduce_into(empty_vec, vec, world, root, false, op); + if (world.rank() == root) + EXPECT_EQ(vec.size(), 0); + else + expect_range_eq(vec, values); + } + + // allreduce a vector into a new vector + auto vec = mpi::all_reduce(values, world, op); + expect_range_eq(vec, result); + + // allreduce a vector in place + vec = values; + mpi::all_reduce_in_place(vec, world, op); + expect_range_eq(vec, result); + + // allreduce a vector in place using all_reduce_into + vec = values; + mpi::all_reduce_into(vec, vec, world, op); + expect_range_eq(vec, result); +} + +TEST(MPI, ReduceIntegerVector) { + mpi::communicator world; + std::vector values(5), result(5); + for (int i = 0; i < 5; ++i) { + values[i] = (i + 1) * (world.rank() + 1); + result[i] = (i + 1) * world.size() * (world.size() + 1) / 2; + } + test_reduce_vector(values, result); +} + +TEST(MPI, ReduceComplexVector) { + mpi::communicator world; + double rank = world.rank() + 1.0; + double red_rank = world.size() * (world.size() + 1) * 0.5; + std::vector> values(5), result(5); + for (int i = 0; i < 5; ++i) { + values[i] = std::complex{rank * (i + 1), -rank * (i + 1)}; + result[i] = std::complex{red_rank * (i + 1), -red_rank * (i + 1)}; + } + test_reduce_vector(values, result); +} + +TEST(MPI, ReduceCustomMPITypeVector) { + mpi::communicator world; + long rank = world.rank() + 1; + long red_rank = world.size() * (world.size() + 1) / 2; + std::vector values(5), result(5); + for (int i = 0; i < 5; ++i) { + values[i] = mpi_t{rank * (i + 1)}; + result[i] = mpi_t{red_rank * (i + 1)}; + } + if (world.size() > 1) { test_reduce_vector(values, result, mpi::map_add()); } +} + +TEST(MPI, ReduceCustomNonMPITypeVector) { + mpi::communicator world; + int rank = world.rank() + 1; + int red_rank = world.size() * (world.size() + 1) / 2; + std::vector values(5), result(5); + for (int i = 0; i < 5; ++i) { + values[i] = non_mpi_t{rank * (i + 1)}; + result[i] = non_mpi_t{red_rank * (i + 1)}; + } + test_reduce_vector(values, result); +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_scatter_range.cpp b/test/c++/mpi_scatter_range.cpp new file mode 100644 index 00000000..a81e3a35 --- /dev/null +++ b/test/c++/mpi_scatter_range.cpp @@ -0,0 +1,114 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test scattering a vector. +template void test_scatter_range(std::vector const &values, long chunk_size) { + mpi::communicator world; + const int rank = world.rank(); + auto sizes = std::vector(world.size()); + for (int i = 0; i < world.size(); ++i) sizes[i] = static_cast(mpi::chunk_length(values.size(), world.size(), i, chunk_size)); + auto acc_sizes = std::vector(world.size() + 1, 0); + std::partial_sum(sizes.begin(), sizes.end(), std::next(acc_sizes.begin())); + EXPECT_EQ(acc_sizes.back(), values.size()); + + // scatter from different roots + for (int root = 0; root < world.size(); ++root) { + // scatter a vector into a span + auto vec = std::vector(sizes[rank], T{0}); + mpi::scatter_range(values, std::span(vec.begin(), sizes[rank]), values.size(), world, root, chunk_size); + expect_range_eq(vec, std::span(values.begin() + acc_sizes[rank], sizes[rank])); + + // scatter with chunk size = number of elements to be scattered + vec = std::vector((rank == 0 ? values.size() : 0), T{0}); + mpi::scatter_range(values, vec, values.size(), world, root, values.size()); + if (world.rank() == 0) + expect_range_eq(vec, values); + else + EXPECT_TRUE(vec.empty()); + } +} + +TEST(MPI, ScatterIntegerRange) { + mpi::communicator world; + const long min_nchunks = 3; + const long chunk_size = 4; + for (int i = 0; i < world.size(); ++i) { + // chunk size = 1 + std::vector values(min_nchunks * world.size() + i); + std::iota(values.begin(), values.end(), 0); + test_scatter_range(values, 1); + + // chunk size = 4 + values.resize((min_nchunks * world.size() + i) * chunk_size); + std::iota(values.begin(), values.end(), 0); + test_scatter_range(values, chunk_size); + } +} + +TEST(MPI, ScatterComplexRange) { + mpi::communicator world; + const long min_nchunks = 3; + const long chunk_size = 4; + for (int i = 0; i < world.size(); ++i) { + // chunk size = 1 + std::vector> values(min_nchunks * world.size() + i); + for (int j = 0; j < values.size(); ++j) values[j] = std::complex(j, -j); + test_scatter_range(values, 1); + + // chunk size = 4 + values.resize((min_nchunks * world.size() + i) * chunk_size); + for (int j = 0; j < values.size(); ++j) values[j] = std::complex(j, -j); + test_scatter_range(values, chunk_size); + } +} + +TEST(MPI, ScatterCustomMPITypeRange) { + mpi::communicator world; + const long min_nchunks = 3; + const long chunk_size = 4; + for (int i = 0; i < world.size(); ++i) { + // chunk size = 1 + std::vector values(min_nchunks * world.size() + i); + for (int j = 0; j < values.size(); ++j) values[j].a = j; + test_scatter_range(values, 1); + + // chunk size = 4 + values.resize((min_nchunks * world.size() + i) * chunk_size); + for (int j = 0; j < values.size(); ++j) values[j].a = j; + test_scatter_range(values, chunk_size); + } +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_scatter_vector.cpp b/test/c++/mpi_scatter_vector.cpp new file mode 100644 index 00000000..e019080c --- /dev/null +++ b/test/c++/mpi_scatter_vector.cpp @@ -0,0 +1,90 @@ +// Copyright (c) 2022-2024 Simons Foundation +// Copyright (c) 2022 Hugo U.R. Strand +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0.txt +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Authors: Thomas Hahn, Hugo U.R. Strand + +#include "./custom_types.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +// Check if two ranges are equal. +void expect_range_eq(auto &&rg1, auto &&rg2) { + EXPECT_EQ(std::ranges::size(rg1), std::ranges::size(rg2)); + auto it2 = std::ranges::begin(rg2); + for (auto &&a : rg1) { EXPECT_EQ(a, *it2++); } +} + +// Test scattering a vector. +template void test_scatter_vector(std::vector const &values) { + mpi::communicator world; + auto recvcounts = std::vector(world.size()); + for (int i = 0; i < world.size(); ++i) recvcounts[i] = static_cast(mpi::chunk_length(values.size(), world.size(), i)); + auto displs = std::vector(world.size() + 1, 0); + std::partial_sum(recvcounts.begin(), recvcounts.end(), std::next(displs.begin())); + auto const recvcount = recvcounts[world.rank()]; + auto const displ = displs[world.rank()]; + + // scatter from different roots + for (int root = 0; root < world.size(); ++root) { + // scatter a vector into a new vector + auto vec = mpi::scatter(world.rank() == root ? values : std::vector{}, world, root); + expect_range_eq(vec, std::span(values.begin() + displ, recvcount)); + + // scatter a vector into an existing vector + vec.clear(); + mpi::scatter_into(values, vec, world, root); + expect_range_eq(vec, std::span(values.begin() + displ, recvcount)); + } + + // scatter an empty vector + auto vec = mpi::scatter(std::vector{}, world); + EXPECT_TRUE(vec.empty()); +} + +TEST(MPI, ScatterIntegerVector) { + mpi::communicator world; + for (int total_size = 3 * world.size(); total_size < 4 * world.size(); ++total_size) { + std::vector values(total_size); + std::iota(values.begin(), values.end(), 0); + test_scatter_vector(values); + } +} + +TEST(MPI, ScatterComplexVector) { + mpi::communicator world; + for (int total_size = 3 * world.size(); total_size < 4 * world.size(); ++total_size) { + std::vector> values(total_size); + for (int i = 0; i < total_size; ++i) values[i] = std::complex(i, -i); + test_scatter_vector(values); + } +} + +TEST(MPI, ScatterCustomMPITypeVector) { + mpi::communicator world; + for (int total_size = 3 * world.size(); total_size < 4 * world.size(); ++total_size) { + std::vector values(total_size); + for (int i = 0; i < total_size; ++i) values[i].a = i; + test_scatter_vector(values); + } +} + +MPI_TEST_MAIN; diff --git a/test/c++/mpi_string.cpp b/test/c++/mpi_string.cpp deleted file mode 100644 index 9c69dda1..00000000 --- a/test/c++/mpi_string.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2020-2024 Simons Foundation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Nils Wentzell - -#include -#include - -#include - -TEST(MPI, StringBroadcast) { - // broadcast a string - mpi::communicator world; - - std::string s; - if (world.rank() == 0) s = "Hello World"; - - mpi::broadcast(s); - - EXPECT_EQ(s, std::string{"Hello World"}); -} - -TEST(MPI, StringGather) { - // gather a string - mpi::communicator world; - std::string s{}, exp_s{}; - for (int i = 0; i < world.size(); ++i) { - for (int j = 0; j < i + 1; ++j) exp_s += "a"; - exp_s += std::to_string(i); - } - for (int i = 0; i < world.rank() + 1; ++i) s += "a"; - s += std::to_string(world.rank()); - - // gather only on root - auto s_gathered = mpi::gather(s); - if (world.rank() == 0) EXPECT_EQ(s_gathered, exp_s); - else EXPECT_TRUE(s_gathered.empty()); - - // gather on all processes - auto s_gathered_all = mpi::all_gather(s); - EXPECT_EQ(s_gathered_all, exp_s); -} - -MPI_TEST_MAIN; diff --git a/test/c++/mpi_vector.cpp b/test/c++/mpi_vector.cpp deleted file mode 100644 index 3630c68e..00000000 --- a/test/c++/mpi_vector.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) 2020-2024 Simons Foundation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Nils Wentzell - -#include "./non_mpi_t.hpp" - -#include -#include -#include - -#include -#include -#include -#include -#include - -TEST(MPI, VectorBroadcastMPIType) { - // broadcast a vector with an MPI type - mpi::communicator world; - std::vector vec(5, 0); - if (world.rank() == 0) { - std::iota(vec.begin(), vec.end(), 0); - } else { - vec.clear(); - } - mpi::broadcast(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i], i); -} - -TEST(MPI, VectorBroadcastTypeWithSpezializedMPIBroadcast) { - // broadcast a vector with a type that has a specialized mpi_broadcast - mpi::communicator world; - std::vector vec(5); - if (world.rank() == 0) { - for (int i = 0; i < 5; ++i) vec[i].a = i; - } - mpi::broadcast(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i); -} - -TEST(MPI, VectorReduceInPlaceMPIType) { - // in-place reduce a vector with an MPI type - mpi::communicator world; - std::vector vec{0, 1, 2, 3, 4}; - mpi::reduce_in_place(vec, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i], i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i], i); - - // in-place allreduce a vector with an MPI type - std::iota(vec.begin(), vec.end(), 0); - mpi::all_reduce_in_place(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i], i * world.size()); -} - -TEST(MPI, VectorReduceInPlaceTypeWithSpezializedMPIReduceInPlace) { - // in-place reduce a vector with a type that has a specialized mpi_reduce_in_place - mpi::communicator world; - std::vector vec(5); - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::reduce_in_place(vec, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i * world.size()); - else - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i); - - // in-place allreduce a vector with a type that has a specialized mpi_reduce_in_place - for (int i = 0; i < 5; ++i) vec[i].a = i; - mpi::all_reduce_in_place(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec[i].a, i * world.size()); -} - -TEST(MPI, VectorReduceMPIType) { - // reduce a vector with complex numbers - mpi::communicator world; - using vec_type = std::vector>; - const int size = 7; - vec_type vec(size); - for (int i = 0; i < size; ++i) vec[i] = std::complex(i, -i); - auto vec_reduced = mpi::reduce(vec, world); - if (world.rank() == 0) - for (int i = 0; i < size; ++i) EXPECT_EQ(vec_reduced[i], std::complex(i * world.size(), -i * world.size())); - else - EXPECT_TRUE(vec_reduced.empty()); - - // allreduce a vector with complex numbers - vec_reduced = mpi::all_reduce(vec, world); - for (int i = 0; i < size; ++i) EXPECT_EQ(vec_reduced[i], std::complex(i * world.size(), -i * world.size())); -} - -TEST(MPI, VectorReduceTypeWithSpezializedMPIReduce) { - // reduce a vector with a type that has a specialized mpi_reduce - mpi::communicator world; - std::vector vec(5); - for (int i = 0; i < 5; ++i) vec[i].a = i; - auto vec_reduced = mpi::reduce(vec, world); - if (world.rank() == 0) - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec_reduced[i].a, i * world.size()); - else - EXPECT_TRUE(vec_reduced.empty()); - - // allreduce a vector with a type that has a specialized mpi_reduce - for (int i = 0; i < 5; ++i) vec[i].a = i; - auto vec_reduced_all = mpi::all_reduce(vec, world); - for (int i = 0; i < 5; ++i) EXPECT_EQ(vec_reduced_all[i].a, i * world.size()); -} - -TEST(MPI, EmptyVectorReduce) { - // reduce an empty vector - mpi::communicator world; - std::vector v1{}; - std::vector v2 = mpi::reduce(v1, world); -} - -TEST(MPI, VectorGatherScatter) { - // scatter and gather a vector of complex numbers - mpi::communicator world; - std::vector> vec(7), scattered_vec(7), gathered_vec(7, {0.0, 0.0}); - for (auto [i, v_i] : itertools::enumerate(vec)) v_i = static_cast(i) + 1.0; - - scattered_vec = mpi::scatter(vec, world); - auto tmp = mpi::scatter(vec, world); - - for (auto &x : scattered_vec) x *= -1; - for (auto &x : vec) x *= -1; - - gathered_vec = mpi::all_gather(scattered_vec, world); - - EXPECT_EQ(vec, gathered_vec); -} - -TEST(MPI, VectorGatherPair) { - // gather a vector of pairs - mpi::communicator world; - auto const rank = world.rank(); - auto const gathered_size = (world.size() + 1) * world.size() / 2; - std::vector> vec(world.rank() + 1); - for (int i = 0; i < vec.size(); ++i) { - vec[i].first = i + rank * (rank + 1) / 2; - vec[i].second = std::to_string(vec[i].first); - } - auto vec_gathered = mpi::all_gather(vec, world); - for (int i = 0; i < gathered_size; ++i) EXPECT_EQ(vec_gathered[i], std::make_pair(i, std::to_string(i))); -} - -TEST(MPI, VectorGatherOnlyOnRoot) { - // gather a vector only on root - mpi::communicator world; - std::vector v = {1, 2, 3}; - auto res = mpi::gather(v, world); - if (world.rank() == 0) { - auto exp_res = v; - for (int i = 1; i < world.size(); ++i) exp_res.insert(exp_res.end(), v.begin(), v.end()); - EXPECT_EQ(res, exp_res); - } else { - EXPECT_TRUE(res.empty()); - } -} - -TEST(MPI, VectorScatterSizeZero) { - // pass a vector of size 0 to scatter - mpi::communicator world; - std::vector v = {1, 2, 3}; - if (world.rank() == 0) v.clear(); - auto res = mpi::scatter(v, world); - EXPECT_TRUE(res.empty()); -} - -MPI_TEST_MAIN; diff --git a/test/c++/non_mpi_t.hpp b/test/c++/non_mpi_t.hpp deleted file mode 100644 index 79821cfe..00000000 --- a/test/c++/non_mpi_t.hpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022-2024 Simons Foundation -// Copyright (c) 2022 Hugo U.R. Strand -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0.txt -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Authors: Thomas Hahn, Hugo U.R. Strand - -#pragma once - -#include -#include - -struct non_mpi_t { - int a{1}; -}; - -// needs to be in the mpi namespace for ADL to work -namespace mpi { - - // specialize mpi_broadcast for foo - void mpi_broadcast(non_mpi_t &f, mpi::communicator c = {}, int root = 0) { broadcast(f.a, c, root); } - - // specialize mpi_reduce_in_place for foo - void mpi_reduce_in_place(non_mpi_t &f, mpi::communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - if (all) { - all_reduce_in_place(f.a, c, op); - } else { - reduce_in_place(f.a, c, root, false, op); - } - } - - // specialize mpi_reduce for foo - non_mpi_t mpi_reduce(non_mpi_t const &f, mpi::communicator c = {}, int root = 0, bool all = false, MPI_Op op = MPI_SUM) { - non_mpi_t res{}; - if (all) { - res.a = all_reduce(f.a, c, op); - } else { - res.a = reduce(f.a, c, root, false, op); - } - return (c.rank() == root || all ? res : non_mpi_t{}); - } - -} // namespace mpi