Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
d1f40e8
Implemented kspace_modify isend yes for non-blocking MPI_Isend commun…
hagertnl Nov 21, 2025
5c6666a
Change isend keyword to nonblocking for KSPACE
hagertnl Nov 24, 2025
7715f2b
Add error if both collective and nonblocking are specified
hagertnl Nov 24, 2025
8ca245b
Merge Kokkos-based KSPACE comms improvements into non-Kokkos based, a…
hagertnl Nov 26, 2025
afc4c27
Add 0 for use_isend parameter of FFT3d init in AMOEBA
hagertnl Nov 26, 2025
8b8d1ba
Added use_isend param=0 for FFT3d init in ELECTRODE and PHONON packages
hagertnl Nov 26, 2025
107c2b9
Added usenonblocking param to FFT3D unit tests
hagertnl Dec 1, 2025
d145b1d
Fix bug in Kokkos Remap, where conditionals were ordered incorrectly
hagertnl Dec 3, 2025
9e348d5
Replace Allreduce-based collective commringlist building with C++ set…
hagertnl Dec 3, 2025
1eb6810
Update remap in Kokkos KSPACE to use set instead of MPI_Allreduce
hagertnl Dec 3, 2025
1ea586c
Added docs for nonblocking keyword for kspace_modify
hagertnl Dec 3, 2025
0df30a3
Added defaults and restriction for collective/nonblocking keywords
hagertnl Dec 3, 2025
9df185f
Removed plan->self usage from collectives, since it's faster for self…
hagertnl Dec 9, 2025
cec67c8
Merge branch 'develop' into nick-pppm-performance-improvements
akohlmey Dec 10, 2025
f89472b
Rename flag
stanmoore1 Dec 10, 2025
b0e6197
simplify handling of collective and nonblocking exclusivity
akohlmey Dec 10, 2025
033af47
add version tag
akohlmey Dec 10, 2025
f98de9e
avoid memory leaks and correct call to free plan
akohlmey Dec 10, 2025
eb1f7aa
Merge pull request #4789 from hagertnl/nick-pppm-performance-improvem…
akohlmey Dec 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions doc/src/kspace_modify.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ Syntax
kspace_modify keyword value ...

* one or more keyword/value pairs may be listed
* keyword = *collective* or *compute* or *cutoff/adjust* or *diff* or *disp/auto* or *fftbench* or *force/disp/kspace* or *force/disp/real* or *force* or *gewald/disp* or *gewald* or *kmax/ewald* or *mesh* or *minorder* or *mix/disp* or *order/disp* or *order* or *overlap* or *scafacos* or *slab* or *splittol* or *wire*
* keyword = *collective* or *nonblocking* or *compute* or *cutoff/adjust* or *diff* or *disp/auto* or *fftbench* or *force/disp/kspace* or *force/disp/real* or *force* or *gewald/disp* or *gewald* or *kmax/ewald* or *mesh* or *minorder* or *mix/disp* or *order/disp* or *order* or *overlap* or *scafacos* or *slab* or *splittol* or *wire*

.. parsed-literal::

*collective* value = *yes* or *no*
*nonblocking* value = *yes* or *no*
*compute* value = *yes* or *no*
*cutoff/adjust* value = *yes* or *no*
*diff* value = *ad* or *ik* = 2 or 4 FFTs for PPPM in smoothed or non-smoothed mode
Expand Down Expand Up @@ -86,6 +87,18 @@ collective operations and adequate hardware.

----------

.. versionadded:: 10Dec2025

The *nonblocking* keyword applies only to PPPM. It is set to *no* by
default. If this option is set to *yes*, LAMMPS will use non-blocking
point-to-point MPI operations to remap data for 3d-FFT operations
instead of the default blocking point-to-point communication. This
allows for better utilization of full network bandwidth by overlapping
communication to multiple other ranks at the same time, as well as
overlapping receiving/unpacking data and sending data.

----------

The *compute* keyword allows Kspace computations to be turned off,
even though a :doc:`kspace_style <kspace_style>` is defined. This is
not useful for running a real simulation, but can be useful for
Expand Down Expand Up @@ -440,7 +453,9 @@ parameters, see the :doc:`Howto dispersion <Howto_dispersion>` doc page.
Restrictions
""""""""""""

none
The *collective* and *nonblocking* keywords cannot both be enabled
at the same time. Whichever of the two keywords is enabled last will
disable the other.

Related commands
""""""""""""""""
Expand All @@ -452,6 +467,8 @@ Default

The option defaults are as follows:

* collective = no
* nonblocking = no
* compute = yes
* cutoff/adjust = yes (MSM)
* diff = ik (PPPM)
Expand Down
6 changes: 3 additions & 3 deletions src/AMOEBA/amoeba_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,17 @@ void AmoebaConvolution::allocate_grid()
fft1 = new FFT3d(lmp,world,nx,ny,nz,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,&tmp,0);
1,0,&tmp,0,0);

fft2 = new FFT3d(lmp,world,nx,ny,nz,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp,0);
0,0,&tmp,0,0);

remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nqty,0,0,FFT_PRECISION,0);
nqty,0,0,FFT_PRECISION,0,0);

// memory allocations

Expand Down
6 changes: 3 additions & 3 deletions src/ELECTRODE/pppm_electrode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,15 +1069,15 @@ void PPPMElectrode::allocate()

fft1 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft,
nzlo_fft, nzhi_fft, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft,
0, 0, &tmp, collective_flag);
0, 0, &tmp, collective_flag, 0);

fft2 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft,
nzlo_fft, nzhi_fft, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, 0, 0,
&tmp, collective_flag);
&tmp, collective_flag, 0);

remap = new Remap(lmp, world, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, nxlo_fft,
nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft, 1, 0, 0, FFT_PRECISION,
collective_flag);
collective_flag, 0);

// ELECTRODE specific allocations

Expand Down
15 changes: 8 additions & 7 deletions src/KOKKOS/fft3d_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf, int usecollective,
int usegpu_aware) :
int usenonblocking, int usegpu_aware) :
Pointers(lmp)
{
int nthreads = lmp->kokkos->nthreads;
Expand Down Expand Up @@ -81,7 +81,7 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
plan = fft_3d_create_plan_kokkos(comm,nfast,nmid,nslow,
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
scaled,permute,nbuf,usecollective,nthreads,usegpu_aware);
scaled,permute,nbuf,usecollective,usenonblocking,nthreads,usegpu_aware);
if (plan == nullptr) error->one(FLERR,"Could not create 3d FFT plan");
}

Expand Down Expand Up @@ -400,6 +400,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
2 = permute twice = slow->fast, fast->mid, mid->slow
nbuf returns size of internal storage buffers used by FFT
usecollective use collective MPI operations for remapping data
usenonblocking when using point-to-point MPI, use MPI_Isend
usegpu_aware use GPU-Aware MPI or not
------------------------------------------------------------------------- */

Expand All @@ -411,7 +412,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf, int usecollective,
int nthreads, int usegpu_aware)
int usenonblocking, int nthreads, int usegpu_aware)
{
struct fft_plan_3d_kokkos<DeviceType> *plan;
int me,nprocs;
Expand Down Expand Up @@ -468,7 +469,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
remapKK->remap_3d_create_plan_kokkos(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,2,0,0,FFT_PRECISION,
usecollective,usegpu_aware);
usecollective,usenonblocking,usegpu_aware);
if (plan->pre_plan == nullptr) return nullptr;
}

Expand All @@ -493,7 +494,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
first_klo,first_khi,
second_ilo,second_ihi,second_jlo,second_jhi,
second_klo,second_khi,2,1,0,FFT_PRECISION,
usecollective,usegpu_aware);
usecollective,usenonblocking,usegpu_aware);
if (plan->mid1_plan == nullptr) return nullptr;

// 1d FFTs along mid axis
Expand Down Expand Up @@ -534,7 +535,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
second_ilo,second_ihi,
third_jlo,third_jhi,third_klo,third_khi,
third_ilo,third_ihi,2,1,0,FFT_PRECISION,
usecollective,usegpu_aware);
usecollective,usenonblocking,usegpu_aware);
if (plan->mid2_plan == nullptr) return nullptr;

// 1d FFTs along slow axis
Expand Down Expand Up @@ -562,7 +563,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
third_jlo,third_jhi,
out_klo,out_khi,out_ilo,out_ihi,
out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION,
usecollective,usegpu_aware);
usecollective,usenonblocking,usegpu_aware);
if (plan->post_plan == nullptr) return nullptr;
}

Expand Down
4 changes: 2 additions & 2 deletions src/KOKKOS/fft3d_kokkos.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class FFT3dKokkos : protected Pointers {

FFT3dKokkos(class LAMMPS *, MPI_Comm,
int,int,int,int,int,int,int,int,int,int,int,int,int,int,int,
int,int,int *,int,int);
int,int,int *,int,int,int);
~FFT3dKokkos() override;
void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int);
void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int);
Expand All @@ -115,7 +115,7 @@ class FFT3dKokkos : protected Pointers {
struct fft_plan_3d_kokkos<DeviceType> *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int,
int, int, int, int, int,
int, int, int, int, int, int, int,
int, int, int *, int, int, int);
int, int, int *, int, int, int, int);

void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos<DeviceType> *);

Expand Down
7 changes: 4 additions & 3 deletions src/KOKKOS/pppm_kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,22 +833,23 @@ void PPPMKokkos<DeviceType>::allocate()
// remap takes data from 3d brick to FFT decomposition

int collective_flag = force->kspace->collective_flag;
int nonblocking_flag = force->kspace->nonblocking_flag;
int gpu_aware_flag = lmp->kokkos->gpu_aware_flag;
int tmp;

fft1 = new FFT3dKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp,collective_flag,gpu_aware_flag);
0,0,&tmp,collective_flag,nonblocking_flag,gpu_aware_flag);

fft2 = new FFT3dKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp,collective_flag,gpu_aware_flag);
0,0,&tmp,collective_flag,nonblocking_flag,gpu_aware_flag);
remap = new RemapKokkos<DeviceType>(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag);
1,0,0,FFT_PRECISION,collective_flag,nonblocking_flag,gpu_aware_flag);
}

/* ----------------------------------------------------------------------
Expand Down
Loading