LAMMPS GridComm patch

This commit is contained in:
Shern Tee
2022-02-25 08:13:16 +00:00
committed by Ludwig Ahrens-Iwers
parent c04f4a913f
commit 6e74d0a09a
4 changed files with 177 additions and 80 deletions

View File

@ -579,7 +579,6 @@ void PPPMElectrode::start_compute()
void PPPMElectrode::compute_vector(bigint *imat, double *vec)
{
start_compute();
double const scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm);
// temporarily store and switch pointers so we can use brick2fft() for
// electrolyte density (without writing an additional function)
@ -602,7 +601,7 @@ void PPPMElectrode::compute_vector(bigint *imat, double *vec)
}
fft1->compute(work1, work1, -1);
// k->r FFT of Green's * electrolyte density = brick_psi
// k->r FFT of Green's * electrolyte density = u_brick
for (int i = 0, n = 0; i < nfft; i++) {
work2[n] = work1[n] * greensfn[i];
n++;
@ -610,18 +609,23 @@ void PPPMElectrode::compute_vector(bigint *imat, double *vec)
n++;
}
fft2->compute(work2, work2, 1);
vector<double> brick_psi(nz_pppm * ny_pppm * nx_pppm, 0.);
for (int k = nzlo_in, n = 0; k <= nzhi_in; k++)
for (int j = nylo_in; j <= nyhi_in; j++)
for (int i = nxlo_in; i <= nxhi_in; i++) {
brick_psi[ny_pppm * nx_pppm * k + nx_pppm * j + i] = work2[n];
u_brick[k][j][i] = work2[n];
n += 2;
}
MPI_Allreduce(MPI_IN_PLACE, &brick_psi.front(), nz_pppm * ny_pppm * nx_pppm, MPI_DOUBLE, MPI_SUM,
world);
gc->forward_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), FORWARD_AD, gc_buf1, gc_buf2,
MPI_FFT_SCALAR);
project_psi(imat, vec);
compute_vector_called = true;
}
// project brick_psi with weight matrix
void PPPMElectrode::project_psi(bigint *imat, double *vec)
{
// project u_brick with weight matrix
double **x = atom->x;
double const scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm);
for (int i = 0; i < atom->nlocal; i++) {
int ipos = imat[i];
if (ipos < 0) continue;
@ -639,28 +643,18 @@ void PPPMElectrode::compute_vector(bigint *imat, double *vec)
for (int ni = nlower; ni <= nupper; ni++) {
double iz0 = rho1d[2][ni];
int miz = ni + niz;
// int debug_miz = miz;
while (miz < 0) miz += nz_pppm;
miz = miz % nz_pppm;
for (int mi = nlower; mi <= nupper; mi++) {
double iy0 = iz0 * rho1d[1][mi];
int miy = mi + niy;
// int debug_miy = miy;
while (miy < 0) miy += ny_pppm;
miy = miy % ny_pppm;
for (int li = nlower; li <= nupper; li++) {
int mix = li + nix;
// int debug_mix = mix;
while (mix < 0) mix += nx_pppm;
mix = mix % nx_pppm;
double ix0 = iy0 * rho1d[0][li];
v += ix0 * brick_psi[ny_pppm * nx_pppm * miz + nx_pppm * miy + mix];
v += ix0 * u_brick[miz][miy][mix];
}
}
}
vec[ipos] += v * scaleinv;
}
compute_vector_called = true;
}
/* ----------------------------------------------------------------------
-------------------------------------------------------------------------
@ -921,6 +915,9 @@ void PPPMElectrode::allocate()
nxlo_out, nxhi_out, "pppm/electrode:electrolyte_density_brick");
memory->create(electrolyte_density_fft, nfft_both, "pppm/electrode:electrolyte_density_fft");
PPPM::allocate();
if (differentiation_flag != 1)
memory->create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:u_brick");
}
/* ----------------------------------------------------------------------
@ -934,8 +931,8 @@ void PPPMElectrode::deallocate()
memory->destroy(electrolyte_density_fft);
memory->destroy3d_offset(density_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
if (differentiation_flag == 1) {
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy(sf_precoeff1);
memory->destroy(sf_precoeff2);
memory->destroy(sf_precoeff3);
@ -969,6 +966,37 @@ void PPPMElectrode::deallocate()
memory->destroy(gc_buf2);
}
void PPPMElectrode::allocate_peratom()
{
peratom_allocate_flag = 1;
memory->create3d_offset(v0_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v0_brick");
memory->create3d_offset(v1_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v1_brick");
memory->create3d_offset(v2_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v2_brick");
memory->create3d_offset(v3_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v3_brick");
memory->create3d_offset(v4_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v4_brick");
memory->create3d_offset(v5_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v5_brick");
// use same GC ghost grid object for peratom grid communication
// but need to reallocate a larger gc_buf1 and gc_buf2
if (differentiation_flag)
npergrid = 6;
else
npergrid = 7;
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
memory->create(gc_buf1, npergrid * ngc_buf1, "pppm:gc_buf1");
memory->create(gc_buf2, npergrid * ngc_buf2, "pppm:gc_buf2");
}
/* ----------------------------------------------------------------------
set global size of PPPM grid = nx,ny,nz_pppm
used for charge accumulation, FFTs, and electric field interpolation

View File

@ -80,6 +80,7 @@ class PPPMElectrode : public PPPM, public ElectrodeKSpace {
virtual void allocate();
virtual void deallocate();
virtual void allocate_peratom();
double compute_df_kspace();
// double estimate_ik_error(double, double, bigint);
virtual double compute_qopt();
@ -115,6 +116,7 @@ class PPPMElectrode : public PPPM, public ElectrodeKSpace {
int compute_step;
void start_compute();
void make_rho_in_brick(bigint *, FFT_SCALAR ***, bool);
void project_psi(bigint *, double *vec);
void one_step_multiplication(bigint *, std::vector<double>, double **, double **, int const);
void two_step_multiplication(bigint *, std::vector<double>, double **, double **, int const);
bool compute_vector_called;

View File

@ -80,6 +80,8 @@ PPPMElectrodeIntel::~PPPMElectrodeIntel()
{
memory->destroy3d_offset(electrolyte_density_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy(electrolyte_density_fft);
if ((differentiation_flag != 1) && !peratom_allocate_flag)
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
delete boundcorr;
}
@ -158,8 +160,10 @@ void PPPMElectrodeIntel::compute(int eflag, int vflag)
{
#ifdef _LMP_INTEL_OFFLOAD
if (_use_base) {
PPPM::compute(eflag, vflag);
return;
error->all(FLERR, "Cannot use pppm/electrode/intel with offload");
// PPPM::compute(eflag, vflag);
// would work if the above line referred to PPPMElectrode
// but the required multiple inheritances would be insane
}
#endif
@ -343,7 +347,7 @@ void PPPMElectrodeIntel::compute_vector(bigint *imat, double *vec)
}
fft1->compute(work1, work1, -1);
// k->r FFT of Green's * electrolyte density = brick_psi
// k->r FFT of Green's * electrolyte density = u_brick
for (int i = 0, n = 0; i < nfft; i++) {
work2[n] = work1[n] * greensfn[i];
n++;
@ -351,35 +355,33 @@ void PPPMElectrodeIntel::compute_vector(bigint *imat, double *vec)
n++;
}
fft2->compute(work2, work2, 1);
int const brick_size = nz_pppm * ny_pppm * nx_pppm;
double *brick_psi = new double[brick_size];
memset(brick_psi, 0, brick_size * sizeof(double));
for (int k = nzlo_in, n = 0; k <= nzhi_in; k++)
for (int j = nylo_in; j <= nyhi_in; j++)
for (int i = nxlo_in; i <= nxhi_in; i++) {
brick_psi[ny_pppm * nx_pppm * k + nx_pppm * j + i] = work2[n];
u_brick[k][j][i] = work2[n];
n += 2;
}
MPI_Allreduce(MPI_IN_PLACE, &brick_psi[0], brick_size, MPI_DOUBLE, MPI_SUM, world);
gc->forward_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), FORWARD_AD, gc_buf1, gc_buf2,
MPI_FFT_SCALAR);
switch (fix->precision()) {
case FixIntel::PREC_MODE_MIXED:
project_psi<float, double>(fix->get_mixed_buffers(), imat, vec, brick_psi);
project_psi<float, double>(fix->get_mixed_buffers(), imat, vec);
break;
case FixIntel::PREC_MODE_DOUBLE:
project_psi<double, double>(fix->get_double_buffers(), imat, vec, brick_psi);
project_psi<double, double>(fix->get_double_buffers(), imat, vec);
break;
default:
project_psi<float, float>(fix->get_single_buffers(), imat, vec, brick_psi);
project_psi<float, float>(fix->get_single_buffers(), imat, vec);
}
compute_vector_called = true;
delete[] brick_psi;
}
// project brick_psi with weight matrix
// project u_brick with weight matrix
template <class flt_t, class acc_t, int use_table>
void PPPMElectrodeIntel::project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *imat, double *vec,
double *brick_psi)
void PPPMElectrodeIntel::project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *imat, double *vec)
{
ATOM_T *_noalias const x = buffers->get_x(0);
@ -472,27 +474,20 @@ void PPPMElectrodeIntel::project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint
#endif
for (int n = 0; n < order; n++) {
int miz = nlower + n + nz;
while (miz < 0) miz += nz_pppm;
miz = miz % nz_pppm;
FFT_SCALAR y0 = rho[2][n];
FFT_SCALAR z0 = rho[2][n];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#endif
for (int m = 0; m < order; m++) {
int miy = nlower + m + ny;
while (miy < 0) miy += ny_pppm;
miy = miy % ny_pppm;
FFT_SCALAR x0 = y0 * rho[1][m];
FFT_SCALAR y0 = z0 * rho[1][m];
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min(2), max(INTEL_P3M_ALIGNED_MAXORDER), avg(7)
#pragma simd
#endif
for (int l = 0; l < order; l++) {
int mix = nlower + l + nx;
while (mix < 0) mix += nx_pppm;
mix = mix % nx_pppm;
int mzyx = ny_pppm * nx_pppm * miz + nx_pppm * miy + mix;
v += x0 * rho[0][l] * brick_psi[mzyx];
v += y0 * rho[0][l] * u_brick[miz][miy][mix];
}
}
}
@ -1047,11 +1042,10 @@ void PPPMElectrodeIntel::allocate()
PPPMIntel::create3d_offset(density_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out,
nxhi_out, "pppm:density_brick");
if (differentiation_flag == 1) {
if ((differentiation_flag == 1) || u_brick != nullptr) {
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
PPPMIntel::create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:u_brick");
} else {
}
if (differentiation_flag != 1) {
memory->destroy3d_offset(vdx_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(vdy_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(vdz_brick, nzlo_out, nylo_out, nxlo_out);
@ -1062,5 +1056,89 @@ void PPPMElectrodeIntel::allocate()
PPPMIntel::create3d_offset(vdz_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out,
nxhi_out, "pppm:vdz_brick");
}
PPPMIntel::create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:u_brick"); // use u_brick in project_psi
}
void PPPMElectrodeIntel::allocate_peratom()
{
// duplicated to avoid reallocating u_brick
peratom_allocate_flag = 1;
memory->create3d_offset(v0_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v0_brick");
memory->create3d_offset(v1_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v1_brick");
memory->create3d_offset(v2_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v2_brick");
memory->create3d_offset(v3_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v3_brick");
memory->create3d_offset(v4_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v4_brick");
memory->create3d_offset(v5_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
"pppm:v5_brick");
// use same GC ghost grid object for peratom grid communication
// but need to reallocate a larger gc_buf1 and gc_buf2
if (differentiation_flag)
npergrid = 6;
else
npergrid = 7;
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
memory->create(gc_buf1, npergrid * ngc_buf1, "pppm:gc_buf1");
memory->create(gc_buf2, npergrid * ngc_buf2, "pppm:gc_buf2");
}
void PPPMElectrodeIntel::deallocate()
{
// duplicated to always deallocate u_brick
memory->destroy3d_offset(density_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
if (differentiation_flag == 1) {
memory->destroy(sf_precoeff1);
memory->destroy(sf_precoeff2);
memory->destroy(sf_precoeff3);
memory->destroy(sf_precoeff4);
memory->destroy(sf_precoeff5);
memory->destroy(sf_precoeff6);
} else {
memory->destroy3d_offset(vdx_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(vdy_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(vdz_brick, nzlo_out, nylo_out, nxlo_out);
}
memory->destroy(density_fft);
memory->destroy(greensfn);
memory->destroy(work1);
memory->destroy(work2);
memory->destroy(vg);
if (triclinic == 0) {
memory->destroy1d_offset(fkx, nxlo_fft);
memory->destroy1d_offset(fky, nylo_fft);
memory->destroy1d_offset(fkz, nzlo_fft);
} else {
memory->destroy(fkx);
memory->destroy(fky);
memory->destroy(fkz);
}
memory->destroy(gf_b);
if (stagger_flag) gf_b = nullptr;
memory->destroy2d_offset(rho1d, -order_allocated / 2);
memory->destroy2d_offset(drho1d, -order_allocated / 2);
memory->destroy2d_offset(rho_coeff, (1 - order_allocated) / 2);
memory->destroy2d_offset(drho_coeff, (1 - order_allocated) / 2);
delete fft1;
delete fft2;
delete remap;
delete gc;
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
}

View File

@ -41,70 +41,59 @@ namespace LAMMPS_NS {
class PPPMElectrodeIntel : public PPPMIntel, public ElectrodeKSpace {
public:
PPPMElectrodeIntel(class LAMMPS *);
virtual ~PPPMElectrodeIntel();
virtual void init();
virtual void setup();
virtual void compute(int, int);
~PPPMElectrodeIntel();
void init();
void setup();
void compute(int, int);
void compute_vector(bigint *, double *);
void compute_vector_corr(bigint *, double *);
void compute_matrix(bigint *, double **);
void compute_matrix_corr(bigint *, double **);
virtual void compute_group_group(int, int, int);
void compute_group_group(int, int, int);
protected:
FFT_SCALAR ***electrolyte_density_brick;
FFT_SCALAR *electrolyte_density_fft;
class BoundaryCorrection *boundcorr;
// virtual void set_grid_global();
// void set_grid_local();
virtual void allocate();
// virtual void deallocate();
// double compute_df_kspace();
void allocate();
void deallocate();
void allocate_peratom();
private:
int compute_step;
void start_compute();
template <class flt_t, class acc_t, int use_table>
void make_rho_in_brick(IntelBuffers<flt_t, acc_t> *buffers, bigint *,
FFT_SCALAR ***, bool);
void make_rho_in_brick(IntelBuffers<flt_t, acc_t> *buffers, bigint *, FFT_SCALAR ***, bool);
template <class flt_t, class acc_t>
void make_rho_in_brick(IntelBuffers<flt_t, acc_t> *buffers, bigint *imat,
FFT_SCALAR ***scratch_brick, bool which_particles) {
FFT_SCALAR ***scratch_brick, bool which_particles)
{
if (_use_table == 1)
make_rho_in_brick<flt_t, acc_t, 1>(buffers, imat, scratch_brick,
which_particles);
make_rho_in_brick<flt_t, acc_t, 1>(buffers, imat, scratch_brick, which_particles);
else
make_rho_in_brick<flt_t, acc_t, 0>(buffers, imat, scratch_brick,
which_particles);
make_rho_in_brick<flt_t, acc_t, 0>(buffers, imat, scratch_brick, which_particles);
}
template <class flt_t, class acc_t, int use_table>
void project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *,
double *, double *);
void project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *, double *);
template <class flt_t, class acc_t>
void project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *imat,
double *vec, double *brick_psi) {
void project_psi(IntelBuffers<flt_t, acc_t> *buffers, bigint *imat, double *vec)
{
if (_use_table == 1)
project_psi<flt_t, acc_t, 1>(buffers, imat, vec,
brick_psi);
project_psi<flt_t, acc_t, 1>(buffers, imat, vec);
else
project_psi<flt_t, acc_t, 0>(buffers, imat, vec,
brick_psi);
project_psi<flt_t, acc_t, 0>(buffers, imat, vec);
}
void one_step_multiplication(bigint *, std::vector<double>,
double **, double **, int const);
void two_step_multiplication(bigint *, std::vector<double>,
double **, double **, int const);
void one_step_multiplication(bigint *, std::vector<double>, double **, double **, int const);
void two_step_multiplication(bigint *, std::vector<double>, double **, double **, int const);
bool compute_vector_called;
bigint *imat_cached;
};
} // namespace LAMMPS_NS
} // namespace LAMMPS_NS
#endif
#endif