Skip to content

Commit bf9e2d9

Browse files
Reduce use of SuperParticle (#4526)
## Summary In preparation of #4404, this PR reduces the reliance of SuperParticle. The issue with getSuperParticle is that the compiler is instructed to read in all the data of the particle into registers even if most of it is not needed. Additionally, runtime data is not accessible through SuperParticle. ## Additional background The previous version of packIOData with RunOnGpu had a bug when some particles were invalid. ## Checklist The proposed changes: - [x] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate --------- Co-authored-by: Andrew Myers <[email protected]>
1 parent 1a59c5e commit bf9e2d9

File tree

9 files changed

+274
-361
lines changed

9 files changed

+274
-361
lines changed

.github/workflows/gcc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ jobs:
376376
# /home/runner/work/amrex/amrex/Src/Base/AMReX_IntVect.H:194:92: error: array subscript -1 is below array bounds of ‘int [3]’ [-Werror=array-bounds]
377377
# int& operator[] (int i) noexcept { BL_ASSERT(i>=0 && i < AMREX_SPACEDIM); return vect[i]; }
378378
#
379-
env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches -Wmissing-include-dirs -Wno-array-bounds"}
379+
env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches -Wmissing-include-dirs -Wno-array-bounds"}
380380
run: |
381381
export CCACHE_COMPRESS=1
382382
export CCACHE_COMPRESSLEVEL=10

Src/Extern/HDF5/AMReX_ParticleHDF5.H

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
6666
WriteHDF5ParticleData(dir, name, write_real_comp, write_int_comp,
6767
tmp_real_comp_names, tmp_int_comp_names,
6868
compression,
69-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p) -> int
69+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i) -> int
7070
{
71-
return p.id() > 0;
71+
return ptd.id(i) > 0;
7272
}, true);
7373
}
7474

@@ -102,9 +102,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
102102
WriteHDF5ParticleData(dir, name, write_real_comp, write_int_comp,
103103
real_comp_names, int_comp_names,
104104
compression,
105-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p) -> int
105+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i) -> int
106106
{
107-
return p.id() > 0;
107+
return ptd.id(i) > 0;
108108
});
109109
}
110110

@@ -138,9 +138,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
138138
WriteHDF5ParticleData(dir, name, write_real_comp, write_int_comp,
139139
real_comp_names, int_comp_names,
140140
compression,
141-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p)
141+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i)
142142
{
143-
return p.id() > 0;
143+
return ptd.id(i) > 0;
144144
});
145145
}
146146

@@ -166,9 +166,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
166166
write_real_comp, write_int_comp,
167167
real_comp_names, int_comp_names,
168168
compression,
169-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p)
169+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i)
170170
{
171-
return p.id() > 0;
171+
return ptd.id(i) > 0;
172172
});
173173
}
174174

@@ -200,9 +200,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
200200
write_real_comp, write_int_comp,
201201
real_comp_names, int_comp_names,
202202
compression,
203-
[=] AMREX_GPU_HOST_DEVICE (const ParticleType& p)
203+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i)
204204
{
205-
return p.id() > 0;
205+
return ptd.id(i) > 0;
206206
});
207207
}
208208

@@ -238,9 +238,9 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
238238
WriteHDF5ParticleData(dir, name, write_real_comp, write_int_comp,
239239
real_comp_names, int_comp_names,
240240
compression,
241-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p)
241+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i)
242242
{
243-
return p.id() > 0;
243+
return ptd.id(i) > 0;
244244
});
245245
}
246246

@@ -261,9 +261,9 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name,
261261
write_real_comp, write_int_comp,
262262
real_comp_names, int_comp_names,
263263
compression,
264-
[=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p)
264+
[=] AMREX_GPU_HOST_DEVICE (const ConstPTDType& ptd, int i)
265265
{
266-
return p.id() > 0;
266+
return ptd.id(i) > 0;
267267
});
268268
}
269269

Src/Particle/AMReX_Particle.H

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ struct alignas(sizeof(double)) Particle
337337

338338
//! The floating point type used for the particles.
339339
using RealType = ParticleReal;
340+
using IntType = int;
340341

341342
static Long the_next_id;
342343

@@ -366,6 +367,12 @@ struct alignas(sizeof(double)) Particle
366367
#endif
367368
}
368369

370+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
371+
uint64_t& idcpu () & { return this->m_idcpu; }
372+
373+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
374+
const uint64_t& idcpu () const & { return this->m_idcpu; }
375+
369376
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
370377
RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_pos[0], this->m_pos[1], this->m_pos[2]));}
371378

Src/Particle/AMReX_ParticleContainer.H

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ public:
185185
//! A single level worth of particles is indexed (grid id, tile id)
186186
//! for both SoA and AoS data.
187187
using ParticleLevel = std::map<std::pair<int, int>, ParticleTileType>;
188+
using PTDType = typename ParticleTileType::ParticleTileDataType;
189+
using ConstPTDType = typename ParticleTileType::ConstParticleTileDataType;
188190
using AoS = typename ParticleTileType::AoS;
189191
using SoA = typename ParticleTileType::SoA;
190192

Src/Particle/AMReX_ParticleContainerI.H

Lines changed: 41 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -954,42 +954,47 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
954954
auto dst = virts.getParticleTileData();
955955
ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k)
956956
{
957-
if(imf_arr(i,j,k,0)!=0)
958-
{
959-
SuperParticleType p;
960-
p.cpu() = 0;
961-
p.id() = LongParticleIds::VirtualParticleID;
962-
963-
//Set rdata(0) first so we can normalize the weighted fields
964-
p.rdata(0) = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM));
965-
//Set pos with the normalized weighted field
966-
for (int n = 0; n < AMREX_SPACEDIM; ++n)
967-
{
968-
p.pos(n) = static_cast<ParticleReal>(partData(i,j,k,n) / p.rdata(0));
969-
}
970-
//Set rdata(n>0) with the normalized weighted field for NStructReal
971-
for (int n = 1; n < NStructReal; ++n)
972-
{
973-
p.rdata(n) = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM+n) / p.rdata(0));
974-
}
975-
//Set rdata(n>0) with the normalized weighted field for NArrayReal
976-
for (int n = 0; n < NArrayReal; ++n)
977-
{
978-
p.rdata(NStructReal+n) = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM+NStructReal+n));
979-
}
980-
//Set idata with the "first" particles idata field for NStructInt
981-
for (int n = 0; n < NStructInt; ++n)
982-
{
983-
p.idata(n) = imf_arr(i,j,k,1+n);
984-
}
985-
//Set idata with the "first" particles idata field for NArrayInt
986-
for (int n = 0; n < NArrayInt; ++n)
987-
{
988-
p.idata(NStructInt+n) = imf_arr(i,j,k,1+NStructInt+n);
989-
}
990-
//Set the new particle in dst tile
991-
dst.setSuperParticle(p, last_offset+offsets_ptr[((i-imf_arr.begin.x)+(j-imf_arr.begin.y)*imf_arr.jstride+(k-imf_arr.begin.z)*imf_arr.kstride)]);
992-
}
957+
if(imf_arr(i,j,k,0)!=0)
958+
{
959+
const auto idx = last_offset + offsets_ptr[
960+
((i-imf_arr.begin.x)
961+
+(j-imf_arr.begin.y)*imf_arr.jstride
962+
+(k-imf_arr.begin.z)*imf_arr.kstride)
963+
];
964+
965+
dst.cpu(idx) = 0;
966+
dst.id(idx) = LongParticleIds::VirtualParticleID;
967+
968+
auto& p = dst[idx];
969+
//Set rdata(0) first so we can normalize the weighted fields
970+
//Note that this does not work for soa PC
971+
p.rdata(0) = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM));;
972+
//Set pos with the normalized weighted field
973+
for (int n = 0; n < AMREX_SPACEDIM; ++n)
974+
{
975+
p.pos(n) = static_cast<ParticleReal>(partData(i,j,k,n) / p.rdata(0));
976+
}
977+
//Set rdata(n>0) with the normalized weighted field for NStructReal
978+
for (int n = 1; n < NStructReal; ++n)
979+
{
980+
p.rdata(n) = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM+n) / p.rdata(0));
981+
}
982+
//Set rdata(n>0) with the normalized weighted field for NArrayReal
983+
for (int n = 0; n < NArrayReal; ++n)
984+
{
985+
dst.rdata(n)[idx] = static_cast<ParticleReal>(partData(i,j,k,AMREX_SPACEDIM+NStructReal+n));
986+
}
987+
//Set idata with the "first" particles idata field for NStructInt
988+
for (int n = 0; n < NStructInt; ++n)
989+
{
990+
p.idata(n) = imf_arr(i,j,k,1+n);
991+
}
992+
//Set idata with the "first" particles idata field for NArrayInt
993+
for (int n = 0; n < NArrayInt; ++n)
994+
{
995+
dst.idata(n)[idx] = imf_arr(i,j,k,1+NStructInt+n);
996+
}
997+
}
993998

994999
});
9951000
last_offset+=next_offset;

0 commit comments

Comments
 (0)