diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 3249c6eacd5..0e19117ded1 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -376,7 +376,7 @@ jobs: # /home/runner/work/amrex/amrex/Src/Base/AMReX_IntVect.H:194:92: error: array subscript -1 is below array bounds of ‘int [3]’ [-Werror=array-bounds] # int& operator[] (int i) noexcept { BL_ASSERT(i>=0 && i < AMREX_SPACEDIM); return vect[i]; } # - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches -Wmissing-include-dirs -Wno-array-bounds"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches -Wmissing-include-dirs -Wno-array-bounds"} run: | export CCACHE_COMPRESS=1 export CCACHE_COMPRESSLEVEL=10 diff --git a/Src/Extern/HDF5/AMReX_ParticleHDF5.H b/Src/Extern/HDF5/AMReX_ParticleHDF5.H index 29747ea9d76..b51e3638fb5 100644 --- a/Src/Extern/HDF5/AMReX_ParticleHDF5.H +++ b/Src/Extern/HDF5/AMReX_ParticleHDF5.H @@ -66,9 +66,9 @@ ParticleContainer_impl int + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i) -> int { - return p.id() > 0; + return ptd.id(i) > 0; }, true); } @@ -102,9 +102,9 @@ ParticleContainer_impl int + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i) -> int { - return p.id() > 0; + return ptd.id(i) > 0; }); } @@ -138,9 +138,9 @@ ParticleContainer_impl 0; + return ptd.id(i) > 0; }); } @@ -166,9 +166,9 @@ ParticleContainer_impl 0; + return ptd.id(i) > 0; }); } @@ -200,9 +200,9 @@ ParticleContainer_impl 0; + return ptd.id(i) > 0; }); } @@ -238,9 +238,9 @@ ParticleContainer_impl 0; + return ptd.id(i) > 0; }); } @@ -261,9 +261,9 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, compression, - [=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p) + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i) { - return p.id() > 0; + return ptd.id(i) > 0; }); } diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 51303ae4161..098fdfb7b7a 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -337,6 +337,7 @@ struct alignas(sizeof(double)) Particle //! The floating point type used for the particles. using RealType = ParticleReal; + using IntType = int; static Long the_next_id; @@ -366,6 +367,12 @@ struct alignas(sizeof(double)) Particle #endif } + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + uint64_t& idcpu () & { return this->m_idcpu; } + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const uint64_t& idcpu () const & { return this->m_idcpu; } + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_pos[0], this->m_pos[1], this->m_pos[2]));} diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 449b1c85409..3baafe1d34e 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -185,6 +185,8 @@ public: //! A single level worth of particles is indexed (grid id, tile id) //! for both SoA and AoS data. using ParticleLevel = std::map, ParticleTileType>; + using PTDType = typename ParticleTileType::ParticleTileDataType; + using ConstPTDType = typename ParticleTileType::ConstParticleTileDataType; using AoS = typename ParticleTileType::AoS; using SoA = typename ParticleTileType::SoA; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 7b200bedb75..1d90fde2c76 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -954,42 +954,47 @@ ParticleContainer_impl(partData(i,j,k,AMREX_SPACEDIM)); - //Set pos with the normalized weighted field - for (int n = 0; n < AMREX_SPACEDIM; ++n) - { - p.pos(n) = static_cast(partData(i,j,k,n) / p.rdata(0)); - } - //Set rdata(n>0) with the normalized weighted field for NStructReal - for (int n = 1; n < NStructReal; ++n) - { - p.rdata(n) = static_cast(partData(i,j,k,AMREX_SPACEDIM+n) / p.rdata(0)); - } - //Set rdata(n>0) with the normalized weighted field for NArrayReal - for (int n = 0; n < NArrayReal; ++n) - { - p.rdata(NStructReal+n) = static_cast(partData(i,j,k,AMREX_SPACEDIM+NStructReal+n)); - } - //Set idata with the "first" particles idata field for NStructInt - for (int n = 0; n < NStructInt; ++n) - { - p.idata(n) = imf_arr(i,j,k,1+n); - } - //Set idata with the "first" particles idata field for NArrayInt - for (int n = 0; n < NArrayInt; ++n) - { - p.idata(NStructInt+n) = imf_arr(i,j,k,1+NStructInt+n); - } - //Set the new particle in dst tile - dst.setSuperParticle(p, last_offset+offsets_ptr[((i-imf_arr.begin.x)+(j-imf_arr.begin.y)*imf_arr.jstride+(k-imf_arr.begin.z)*imf_arr.kstride)]); - } + if(imf_arr(i,j,k,0)!=0) + { + const auto idx = last_offset + offsets_ptr[ + ((i-imf_arr.begin.x) + +(j-imf_arr.begin.y)*imf_arr.jstride + +(k-imf_arr.begin.z)*imf_arr.kstride) + ]; + + dst.cpu(idx) = 0; + dst.id(idx) = LongParticleIds::VirtualParticleID; + + auto& p = dst[idx]; + //Set rdata(0) first so we can normalize the weighted fields + //Note that this does not work for soa PC + p.rdata(0) = static_cast(partData(i,j,k,AMREX_SPACEDIM));; + //Set pos with the normalized weighted field + for (int n = 0; n < AMREX_SPACEDIM; ++n) + { + p.pos(n) = static_cast(partData(i,j,k,n) / p.rdata(0)); + } + //Set rdata(n>0) with the normalized weighted field for NStructReal + for (int n = 1; n < NStructReal; ++n) + { + p.rdata(n) = static_cast(partData(i,j,k,AMREX_SPACEDIM+n) / p.rdata(0)); + } + //Set rdata(n>0) with the normalized weighted field for NArrayReal + for (int n = 0; n < NArrayReal; ++n) + { + dst.rdata(n)[idx] = static_cast(partData(i,j,k,AMREX_SPACEDIM+NStructReal+n)); + } + //Set idata with the "first" particles idata field for NStructInt + for (int n = 0; n < NStructInt; ++n) + { + p.idata(n) = imf_arr(i,j,k,1+n); + } + //Set idata with the "first" particles idata field for NArrayInt + for (int n = 0; n < NArrayInt; ++n) + { + dst.idata(n)[idx] = imf_arr(i,j,k,1+NStructInt+n); + } + } }); last_offset+=next_offset; diff --git a/Src/Particle/AMReX_ParticleReduce.H b/Src/Particle/AMReX_ParticleReduce.H index db2bb6ea3e2..50002e29327 100644 --- a/Src/Particle/AMReX_ParticleReduce.H +++ b/Src/Particle/AMReX_ParticleReduce.H @@ -66,18 +66,18 @@ auto call_f (F const& f, * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto sm = amrex::ReduceSum(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceSum (PC const& pc, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceSum(pc, 0, pc.finestLevel(), std::forward(f)); } @@ -114,18 +114,18 @@ ReduceSum (PC const& pc, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto sm = amrex::ReduceSum(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceSum (PC const& pc, int lev, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceSum(pc, lev, lev, std::forward(f)); } @@ -163,20 +163,20 @@ ReduceSum (PC const& pc, int lev, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto sm = amrex::ReduceSum(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceSum (PC const& pc, int lev_min, int lev_max, F const& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { - using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())); + using value_type = decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())); value_type sm = 0; #ifdef AMREX_USE_GPU @@ -266,18 +266,18 @@ ReduceSum (PC const& pc, int lev_min, int lev_max, F const& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mx = amrex::ReduceMax(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMax (PC const& pc, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceMax(pc, 0, pc.finestLevel(), std::forward(f)); } @@ -314,18 +314,18 @@ ReduceMax (PC const& pc, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mx = amrex::ReduceMax(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMax (PC const& pc, int lev, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceMax(pc, lev, lev, std::forward(f)); } @@ -363,20 +363,20 @@ ReduceMax (PC const& pc, int lev, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mx = amrex::ReduceMax(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMax (PC const& pc, int lev_min, int lev_max, F const& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { - using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())); + using value_type = decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())); constexpr value_type value_lowest = std::numeric_limits::lowest(); value_type r = value_lowest; @@ -467,18 +467,18 @@ ReduceMax (PC const& pc, int lev_min, int lev_max, F const& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mn = amrex::ReduceMin(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMin (PC const& pc, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceMin(pc, 0, pc.finestLevel(), std::forward(f)); } @@ -515,18 +515,18 @@ ReduceMin (PC const& pc, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mn = amrex::ReduceMin(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMin (PC const& pc, int lev, F&& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { return ReduceMin(pc, lev, lev, std::forward(f)); } @@ -564,20 +564,20 @@ ReduceMin (PC const& pc, int lev, F&& f) * return p.idata(0); * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto mn = amrex::ReduceMin(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> ParticleReal + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> ParticleReal * { - * return ptd.m_aos[i].rdata(0); + * return ptd.rdata(0)[i]; * }); * */ template ::value, int> foo = 0> auto ReduceMin (PC const& pc, int lev_min, int lev_max, F const& f) - -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())) + -> decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())) { - using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int())); + using value_type = decltype(particle_detail::call_f(f, typename PC::ConstPTDType(), int())); constexpr value_type value_max = std::numeric_limits::max(); value_type r = value_max; @@ -668,11 +668,11 @@ ReduceMin (PC const& pc, int lev_min, int lev_max, F const& f) * return p.id() > 0; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalAnd(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() > 0; + * return ptd.id(i) > 0; * }); * */ @@ -715,11 +715,11 @@ ReduceLogicalAnd (PC const& pc, F&& f) * return p.id() > 0; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalAnd(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() > 0; + * return ptd.id(i) > 0; * }); * */ @@ -763,11 +763,11 @@ ReduceLogicalAnd (PC const& pc, int lev, F&& f) * return p.id() > 0; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalAnd(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() > 0; + * return ptd.id(i) > 0; * }); * */ @@ -864,11 +864,11 @@ ReduceLogicalAnd (PC const& pc, int lev_min, int lev_max, F const& f) * return p.id() < 1; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalOr(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() < 1; + * return ptd.id(i) < 1; * }); * */ @@ -911,11 +911,11 @@ ReduceLogicalOr (PC const& pc, F&& f) * return p.id() < 1; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalOr(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() < 1; + * return ptd.id(i) < 1; * }); * */ @@ -959,11 +959,11 @@ ReduceLogicalOr (PC const& pc, int lev, F&& f) * return p.id() < 1; * }); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * auto rv = amrex::ReduceLogicalOr(pc, - * [=] AMREX_GPU_HOST_DEVICE (const PTDType& ptd, const int i) -> bool + * [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> bool * { - * return ptd.m_aos[i].id() < 1; + * return ptd.id(i) < 1; * }); * */ @@ -1078,15 +1078,15 @@ ReduceLogicalOr (PC const& pc, int lev_min, int lev_max, F const& f) * return {a, b, c}; * }, reduce_ops); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * amrex::ReduceOps reduce_ops; * auto r = amrex::ParticleReduce> ( - * pc, [=] AMREX_GPU_DEVICE (const PTDType& ptd, const int i) noexcept + * pc, [=] AMREX_GPU_DEVICE (const ConstPTDType& ptd, const int i) noexcept * -> amrex::GpuTuple * { - * const amrex::Real a = ptd.m_aos[i].rdata(1); - * const amrex::Real b = ptd.m_aos[i].rdata(2); - * const int c = ptd.m_aos[i].idata(1); + * const amrex::Real a = ptd.rdata(1)[i]; + * const amrex::Real b = ptd.rdata(2)[i]; + * const int c = ptd.idata(1)[i]; * return {a, b, c}; * }, reduce_ops); * @@ -1148,15 +1148,15 @@ ParticleReduce (PC const& pc, F&& f, ReduceOps& reduce_ops) * return {a, b, c}; * }, reduce_ops); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * amrex::ReduceOps reduce_ops; * auto r = amrex::ParticleReduce> ( - * pc, [=] AMREX_GPU_DEVICE (const PTDType& ptd, const int i) noexcept + * pc, [=] AMREX_GPU_DEVICE (const ConstPTDType& ptd, const int i) noexcept * -> amrex::GpuTuple * { - * const amrex::Real a = ptd.m_aos[i].rdata(1); - * const amrex::Real b = ptd.m_aos[i].rdata(2); - * const int c = ptd.m_aos[i].idata(1); + * const amrex::Real a = ptd.rdata(1)[i]; + * const amrex::Real b = ptd.rdata(2)[i]; + * const int c = ptd.idata(1)[i]; * return {a, b, c}; * }, reduce_ops); * @@ -1219,15 +1219,15 @@ ParticleReduce (PC const& pc, int lev, F&& f, ReduceOps& reduce_ops) * return {a, b, c}; * }, reduce_ops); * - * using PTDType = typename PC::ParticleTileType::ConstParticleTileDataType; + * using ConstPTDType = typename PC::ConstPTDType; * amrex::ReduceOps reduce_ops; * auto r = amrex::ParticleReduce> ( - * pc, [=] AMREX_GPU_DEVICE (const PTDType& ptd, const int i) noexcept + * pc, [=] AMREX_GPU_DEVICE (const ConstPTDType& ptd, const int i) noexcept * -> amrex::GpuTuple * { - * const amrex::Real a = ptd.m_aos[i].rdata(1); - * const amrex::Real b = ptd.m_aos[i].rdata(2); - * const int c = ptd.m_aos[i].idata(1); + * const amrex::Real a = ptd.rdata(1)[i]; + * const amrex::Real b = ptd.rdata(2)[i]; + * const int c = ptd.idata(1)[i]; * return {a, b, c}; * }, reduce_ops); * diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 80f7705e266..d7ba7a1f136 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -36,6 +36,8 @@ struct ParticleTileData using ParticleType = T_ParticleType; using ParticleRefType = T_ParticleType&; using Self = ParticleTileData; + using RealType = ParticleReal; + using IntType = int; static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; @@ -60,7 +62,7 @@ struct ParticleTileData int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleReal& pos (const int dir, const int index) const & + decltype(auto) pos (const int dir, const int index) const & { if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].pos(dir); @@ -95,7 +97,7 @@ struct ParticleTileData if constexpr(ParticleType::is_soa_particle) { return this->m_idcpu[index]; } else { - amrex::Abort("not implemented"); + return this->m_aos[index].idcpu(); } } @@ -329,6 +331,7 @@ struct alignas(sizeof(double)) ConstSoAParticle : SoAParticleBase static constexpr bool is_constsoa_particle = true; using RealType = ParticleReal; + using IntType = int; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE ConstSoAParticle (ConstPTD const& ptd, long i) : // Note: should this be int instead? @@ -374,7 +377,7 @@ struct alignas(sizeof(double)) ConstSoAParticle : SoAParticleBase private : - static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + static_assert(std::is_trivially_copyable(), "ParticleTileData is not trivially copyable"); ConstPTD m_constparticle_tile_data; int m_index; @@ -392,6 +395,7 @@ struct alignas(sizeof(double)) SoAParticle : SoAParticleBase using ConstType = ConstSoAParticle; using RealType = ParticleReal; + using IntType = int; AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SoAParticle (PTD const& ptd, long i) : // Note: should this be int instead? @@ -456,7 +460,7 @@ struct alignas(sizeof(double)) SoAParticle : SoAParticleBase private : - static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + static_assert(std::is_trivially_copyable(), "ParticleTileData is not trivially copyable"); PTD m_particle_tile_data; int m_index; @@ -511,6 +515,8 @@ struct ConstParticleTileData static constexpr int NAI = NArrayInt; using ParticleType = T_ParticleType; using ParticleRefType = T_ParticleType const&; + using RealType = ParticleReal; + using IntType = int; static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; @@ -530,7 +536,7 @@ struct ConstParticleTileData GpuArray m_idata; [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const ParticleReal& pos (const int dir, const int index) const & + decltype(auto) pos (const int dir, const int index) const & { if constexpr(!ParticleType::is_soa_particle) { return this->m_aos[index].pos(dir); @@ -565,7 +571,7 @@ struct ConstParticleTileData if constexpr(ParticleType::is_soa_particle) { return this->m_idcpu[index]; } else { - amrex::Abort("not implemented"); + return this->m_aos[index].idcpu(); } } diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 3c03efe076b..4a5117a1d29 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -47,8 +47,12 @@ fillFlags (Container& pflags, const PTile& ptile, F const& f) amrex::ignore_unused(flag_ptr, f, engine); if constexpr (IsCallable::value) { flag_ptr[k] = f(p,engine); - } else { + } else if constexpr (IsCallable::value) { flag_ptr[k] = f(p); + } else if constexpr (IsCallable::value) { + flag_ptr[k] = f(ptd,k,engine); + } else { + flag_ptr[k] = f(ptd,k); } }); } @@ -68,8 +72,12 @@ fillFlags (Container& pflags, const PTile& ptile, F const& f) const auto p = ptd.getSuperParticle(k); if constexpr (IsCallable::value) { flag_ptr[k] = f(p,getInvalidRandomEngine()); - } else { + } else if constexpr (IsCallable::value) { flag_ptr[k] = f(p); + } else if constexpr (IsCallable::value) { + flag_ptr[k] = f(ptd,k,getInvalidRandomEngine()); + } else { + flag_ptr[k] = f(ptd,k); } } } @@ -150,22 +158,98 @@ countFlags (const Container& pflags) return nparticles; } -template +template AMREX_GPU_HOST_DEVICE -void packParticleIDs (I* idata, const P& p, bool is_checkpoint) noexcept +void packParticleIDs (I* idata, const std::uint64_t idcpu, bool is_checkpoint) noexcept { if (is_checkpoint) { std::int32_t xi, yi; std::uint32_t xu, yu; - xu = (std::uint32_t)((p.m_idcpu & 0xFFFFFFFF00000000LL) >> 32); - yu = (std::uint32_t)( p.m_idcpu & 0xFFFFFFFFLL); + xu = (std::uint32_t)((idcpu & 0xFFFFFFFF00000000LL) >> 32); + yu = (std::uint32_t)( idcpu & 0xFFFFFFFFLL); amrex::Gpu::memcpy(&xi, &xu, sizeof(xu)); amrex::Gpu::memcpy(&yi, &yu, sizeof(yu)); idata[0] = xi; idata[1] = yi; } else { - idata[0] = p.id(); - idata[1] = p.cpu(); + idata[0] = ConstParticleIDWrapper{idcpu}; + idata[1] = ConstParticleCPUWrapper{idcpu}; + } +} + +template +AMREX_GPU_HOST_DEVICE void +rPackParticleData (const PTD& ptd, int idx, typename PTD::RealType * rdata_ptr, + const int * write_real_comp) +{ + std::size_t rout_index = 0; + + for (int j = 0; j < AMREX_SPACEDIM; ++j) { + rdata_ptr[rout_index] = ptd.pos(j, idx); + rout_index++; + } + + if constexpr (!PTD::ParticleType::is_soa_particle) { + const auto& p = ptd[idx]; + + for (int j = 0; j < PTD::ParticleType::NReal; ++j) { + if (write_real_comp[j]) { + rdata_ptr[rout_index] = p.rdata(j); + rout_index++; + } + } + } + + constexpr int real_start_offset = PTD::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; + + for (int j = real_start_offset; j < PTD::NAR; ++j) { + if (write_real_comp[PTD::ParticleType::NReal + j - real_start_offset]) { + rdata_ptr[rout_index] = ptd.rdata(j)[idx]; + rout_index++; + } + } + + for (int j = 0; j < ptd.m_num_runtime_real; ++j) { + if (write_real_comp[PTD::ParticleType::NReal + PTD::NAR + j - real_start_offset]) { + rdata_ptr[rout_index] = ptd.m_runtime_rdata[j][idx]; + rout_index++; + } + } +} + +template +AMREX_GPU_HOST_DEVICE void +iPackParticleData (const PTD& ptd, int idx, typename PTD::IntType * idata_ptr, + const int * write_int_comp, bool is_checkpoint) +{ + std::size_t iout_index = 0; + + packParticleIDs(&idata_ptr[iout_index], ptd.idcpu(idx), is_checkpoint); + iout_index += 2; + + if constexpr (!PTD::ParticleType::is_soa_particle) { + const auto& p = ptd[idx]; + + for (int j = 0; j < PTD::ParticleType::NInt; ++j) { + if (write_int_comp[j]) { + idata_ptr[iout_index] = p.idata(j); + iout_index++; + } + } + } + + for (int j = 0; j < PTD::NAI; ++j) { + if (write_int_comp[PTD::ParticleType::NInt + j]) { + idata_ptr[iout_index] = ptd.idata(j)[idx]; + iout_index++; + } + } + + for (int j = 0; j < ptd.m_num_runtime_int; ++j) { + if (write_int_comp[PTD::ParticleType::NInt + PTD::NAI + j]) { + idata_ptr[iout_index] = ptd.m_runtime_idata[j][idx]; + iout_index++; + } } } @@ -214,58 +298,22 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l typename PC::RealVector rdata_d(num_copies*rChunkSize); const auto flag_ptr = pflags.data(); + const auto offset_ptr = offsets.data(); auto idata_d_ptr = idata_d.data(); auto rdata_d_ptr = rdata_d.data(); const auto& ptd = ptile.getConstParticleTileData(); - amrex::ParallelFor(num_copies, + amrex::ParallelFor(ptile.numParticles(), [=] AMREX_GPU_DEVICE (int pindex) noexcept { - // might be worth using shared memory here - const auto p = ptd.getSuperParticle(pindex); - if (flag_ptr[pindex]) { - std::size_t iout_index = pindex*iChunkSize; - packParticleIDs(&idata_d_ptr[iout_index], p, is_checkpoint); - iout_index += 2; - - std::size_t rout_index = pindex*rChunkSize; - for (int j = 0; j < AMREX_SPACEDIM; j++) { - rdata_d_ptr[rout_index] = p.pos(j); - rout_index++; - } - - for (int j = 0; j < PC::SuperParticleType::NInt; j++) { - if (write_int_comp_d_ptr[j]) { - idata_d_ptr[iout_index] = p.idata(j); - iout_index++; - } - } + const int out_indx = offset_ptr[pindex]; + iPackParticleData(ptd, pindex, idata_d_ptr + out_indx * iChunkSize, + write_int_comp_d_ptr, is_checkpoint); - for (int j = 0; j < ptd.m_num_runtime_int; j++) { - if (write_int_comp_d_ptr[PC::SuperParticleType::NInt + j]) { - idata_d_ptr[iout_index] = ptd.m_runtime_idata[j][pindex]; - iout_index++; - } - } - - // extra SoA Real components - const int real_start_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions - for (int j = real_start_offset; j < PC::SuperParticleType::NReal; j++) { - const int write_comp_index = j-real_start_offset; - if (write_real_comp_d_ptr[write_comp_index]) { - rdata_d_ptr[rout_index] = p.rdata(j); - rout_index++; - } - } - - for (int j = 0; j < ptd.m_num_runtime_real; j++) { - if (write_real_comp_d_ptr[PC::SuperParticleType::NReal+j-real_start_offset]) { - rdata_d_ptr[rout_index] = ptd.m_runtime_rdata[j][pindex]; - rout_index++; - } - } + rPackParticleData(ptd, pindex, rdata_d_ptr + out_indx * rChunkSize, + write_real_comp_d_ptr); } }); @@ -307,84 +355,17 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l for (int tile : tiles) { const auto& ptile = pc.ParticlesAt(lev, grid, tile); const auto& pflags = particle_io_flags[lev].at(std::make_pair(grid, tile)); + const auto& ptd = ptile.getConstParticleTileData(); + for (int pindex = 0; pindex < ptile.numParticles(); ++pindex) { if (pflags[pindex]) { - const auto& soa = ptile.GetStructOfArrays(); - - // note: for pure SoA particle layouts, we do write the id, cpu and positions as a struct - // for backwards compatibility with readers - if constexpr(!PC::ParticleType::is_soa_particle) - { - const auto& aos = ptile.GetArrayOfStructs(); - const auto& p = aos[pindex]; - - // Int: id, cpu - packParticleIDs(iptr, p, is_checkpoint); - iptr += 2; - - // Real: positions - for (int j = 0; j < AMREX_SPACEDIM; j++) { rptr[j] = p.pos(j); } - rptr += AMREX_SPACEDIM; - - // extra AoS Int components - for (int j = 0; j < PC::NStructInt; j++) { - if (write_int_comp[j]) { - *iptr = p.idata(j); - ++iptr; - } - } - // extra AoS Real components - for (int j = 0; j < PC::NStructReal; j++) { - if (write_real_comp[j]) { - *rptr = p.rdata(j); - ++rptr; - } - } - } - else { - uint64_t idcpu = soa.GetIdCPUData()[pindex]; - if (is_checkpoint) { - std::int32_t xi, yi; - std::uint32_t xu, yu; - xu = (std::uint32_t)((idcpu & 0xFFFFFFFF00000000LL) >> 32); - yu = (std::uint32_t)( idcpu & 0xFFFFFFFFLL); - std::memcpy(&xi, &xu, sizeof(xu)); - std::memcpy(&yi, &yu, sizeof(yu)); - *iptr = xi; - iptr += 1; - *iptr = yi; - iptr += 1; - } else { - // Int: id, cpu - *iptr = (int) ParticleIDWrapper(idcpu); - iptr += 1; - *iptr = (int) ParticleCPUWrapper(idcpu); - iptr += 1; - } - - // Real: position - for (int j = 0; j < AMREX_SPACEDIM; j++) { rptr[j] = soa.GetRealData(j)[pindex]; } - rptr += AMREX_SPACEDIM; - } + iPackParticleData(ptd, pindex, iptr, + write_int_comp.dataPtr(), is_checkpoint); + iptr += iChunkSize; - // SoA int data - const int int_start_offset = 0; - for (int j = int_start_offset; j < pc.NumIntComps(); j++) { - if (write_int_comp[PC::NStructInt+j]) { - *iptr = soa.GetIntData(j)[pindex]; - ++iptr; - } - } - - // extra SoA Real components - const int real_start_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions - for (int j = real_start_offset; j < pc.NumRealComps(); j++) { - const int write_comp_index = PC::NStructReal+j-real_start_offset; - if (write_real_comp[write_comp_index]) { - *rptr = (ParticleReal) soa.GetRealData(j)[pindex]; - ++rptr; - } - } + rPackParticleData(ptd, pindex, rptr, + write_real_comp.dataPtr()); + rptr += rChunkSize; } } } @@ -875,7 +856,6 @@ void WriteBinaryParticleDataAsync (PC const& pc, dms.push_back(pc.ParticleDistributionMap(lev)); } - int nrc = pc.NumRealComps(); int nic = pc.NumIntComps(); int rnames_size = (int) real_comp_names.size(); @@ -1028,62 +1008,11 @@ void WriteBinaryParticleDataAsync (PC const& pc, const auto& ptd = pbox.getConstParticleTileData(); for (int pindex = 0; pindex < pbox.numParticles(); ++pindex) { - const auto& soa = pbox.GetStructOfArrays(); - - make_particle mp{}; - const auto& p = mp(ptd, pindex); - if (p.id() <= 0) { continue; } - - // note: for pure SoA particle layouts, we do write the id, cpu and positions as a struct - // for backwards compatibility with readers - if constexpr(!PC::ParticleType::is_soa_particle) - { - // Ints: id, cpu - particle_detail::packParticleIDs(iptr, p, is_checkpoint); - iptr += 2; - - // extra AoS Int components - for (int j = 0; j < NStructInt; j++) - { - if (write_int_comp[j]) - { - *iptr = p.idata(j); - ++iptr; - } - } - } - else { - uint64_t idcpu = soa.GetIdCPUData()[pindex]; - if (is_checkpoint) { - std::int32_t xi, yi; - std::uint32_t xu, yu; - xu = (std::uint32_t)((idcpu & 0xFFFFFFFF00000000LL) >> 32); - yu = (std::uint32_t)( idcpu & 0xFFFFFFFFLL); - std::memcpy(&xi, &xu, sizeof(xu)); - std::memcpy(&yi, &yu, sizeof(yu)); - *iptr = xi; - iptr += 1; - *iptr = yi; - iptr += 1; - } else { - // Int: id, cpu - *iptr = (int) ParticleIDWrapper(idcpu); - iptr += 1; - *iptr = (int) ParticleCPUWrapper(idcpu); - iptr += 1; - } - } + if (ptd.id(pindex) <= 0) { continue; } - // extra SoA Ints - const int int_start_offset = 0; - for (int j = int_start_offset; j < nic; j++) - { - if (write_int_comp[NStructInt+j]) - { - *iptr = soa.GetIntData(j)[pindex]; - ++iptr; - } - } + particle_detail::iPackParticleData(ptd, pindex, iptr, + write_int_comp.dataPtr(), is_checkpoint); + iptr += iChunkSize; } } @@ -1104,49 +1033,13 @@ void WriteBinaryParticleDataAsync (PC const& pc, auto ptile_index = std::make_pair(grid, tile_map[grid][i]); const auto& pbox = (*myptiles)[lev][ptile_index]; const auto& ptd = pbox.getConstParticleTileData(); - for (int pindex = 0; - pindex < pbox.numParticles(); ++pindex) + for (int pindex = 0; pindex < pbox.numParticles(); ++pindex) { - const auto& soa = pbox.GetStructOfArrays(); - make_particle mp{}; - const auto& p = mp(ptd, pindex); - - if (p.id() <= 0) { continue; } - - if constexpr(!PC::ParticleType::is_soa_particle) - { - // Real: position - for (int j = 0; j < AMREX_SPACEDIM; j++) { rptr[j] = p.pos(j); } - rptr += AMREX_SPACEDIM; - - // extra AoS real - for (int j = 0; j < NStructReal; j++) - { - if (write_real_comp[j]) - { - *rptr = p.rdata(j); - ++rptr; - } - } - } - else { - // Real: position - for (int j = 0; j < AMREX_SPACEDIM; j++) { rptr[j] = soa.GetRealData(j)[pindex]; } - rptr += AMREX_SPACEDIM; - } + if (ptd.id(pindex) <= 0) { continue; } - // extra SoA real - const int real_start_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: positions - for (int j = real_start_offset; j < nrc; j++) - { - const int write_comp_offset = PC::ParticleType::is_soa_particle ? AMREX_SPACEDIM : 0; // pure SoA: skip positions - const int write_comp_index = PC::NStructReal+j-write_comp_offset; - if (write_real_comp[write_comp_index]) - { - *rptr = (typename PC::ParticleType::RealType) soa.GetRealData(j)[pindex]; - ++rptr; - } - } + particle_detail::rPackParticleData(ptd, pindex, rptr, + write_real_comp.dataPtr()); + rptr += rChunkSize; } } diff --git a/Tests/Particles/CheckpointRestartSOA/main.cpp b/Tests/Particles/CheckpointRestartSOA/main.cpp index e3c544516e2..af2ce0f7a0b 100644 --- a/Tests/Particles/CheckpointRestartSOA/main.cpp +++ b/Tests/Particles/CheckpointRestartSOA/main.cpp @@ -150,21 +150,21 @@ void test () std::snprintf(directory_path, sizeof directory_path, "%s%s", directory.c_str(), "plt00000"); newPC.Restart(directory_path, "particle0"); - using PType = typename MyPC::SuperParticleType; + using ConstPTDType = typename MyPC::ConstPTDType; for (int icomp=0; icomp Real + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> Real { - return p.rdata(icomp); + return ptd.rdata(icomp)[i]; }); auto sm_old = amrex::ReduceSum(myPC, - [=] AMREX_GPU_HOST_DEVICE (const PType& p) -> Real + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> Real { - return p.rdata(icomp); + return ptd.rdata(icomp)[i]; }); ParallelDescriptor::ReduceRealSum(sm_new); @@ -178,15 +178,15 @@ void test () { amrex::Print() << "working on comp " << icomp << "\n"; auto sm_new = amrex::ReduceSum(newPC, - [=] AMREX_GPU_HOST_DEVICE (const PType& p) -> Real + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> Real { - return p.idata(icomp); + return ptd.idata(icomp)[i]; }); auto sm_old = amrex::ReduceSum(myPC, - [=] AMREX_GPU_HOST_DEVICE (const PType& p) -> Real + [=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, const int i) -> Real { - return p.idata(icomp); + return ptd.idata(icomp)[i]; }); ParallelDescriptor::ReduceRealSum(sm_new);