From 1211af65a16f06d6ba99d202b783509c362f6e9b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 15 Mar 2024 12:10:12 -0600 Subject: [PATCH 1/2] Fix Kokkos teamsize too large issue --- src/KOKKOS/pair_kokkos.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 54502f290c..15417d7620 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -950,6 +950,8 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P static int vectorsize = 0; static int atoms_per_team = 0; + static int teamsize_max_for = 0; + static int teamsize_max_reduce = 0; #if defined(LMP_KOKKOS_GPU) static int lastcall = -1; @@ -966,7 +968,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P vectorsize = MIN(vectorsize,max_vectorsize); - int teamsize_max_for,teamsize_max_reduce; if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { PairComputeFunctor ff(fpair,list); GetMaxTeamSize(ff, inum, teamsize_max_for, teamsize_max_reduce); @@ -974,12 +975,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P PairComputeFunctor ff(fpair,list); GetMaxTeamSize(ff, inum, teamsize_max_for, teamsize_max_reduce); } - - int teamsize_max = teamsize_max_for; - if (fpair->eflag || fpair->vflag) - teamsize_max = teamsize_max_reduce; - atoms_per_team = teamsize_max/vectorsize; } + + int teamsize_max = teamsize_max_for; + if (fpair->eflag || fpair->vflag) + teamsize_max = teamsize_max_reduce; + atoms_per_team = teamsize_max/vectorsize; #else vectorsize = 1; atoms_per_team = 1; From 6a28e8d5f6c2357dfe9ab8e68a99def27c69dd65 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 18 Mar 2024 13:27:21 -0500 Subject: [PATCH 2/2] Fixed bugs with sph gpu pair styles --- lib/gpu/lal_sph_heatconduction.cu | 22 ++++----- lib/gpu/lal_sph_lj.cu | 45 ++++++++--------- lib/gpu/lal_sph_taitwater.cu | 64 +++++++++++-------------- src/GPU/pair_sph_heatconduction_gpu.cpp | 4 +- src/GPU/pair_sph_lj_gpu.cpp | 28 +++++------ src/GPU/pair_sph_taitwater_gpu.cpp | 48 +++++++++++++------ 6 files changed, 112 insertions(+), 99 deletions(-) diff --git a/lib/gpu/lal_sph_heatconduction.cu b/lib/gpu/lal_sph_heatconduction.cu index e2ba40db0c..8e4ec6ff19 100644 --- a/lib/gpu/lal_sph_heatconduction.cu +++ b/lib/gpu/lal_sph_heatconduction.cu @@ -29,23 +29,23 @@ _texture_2d( vel_tex,int4); #if (SHUFFLE_AVAIL == 0) -#define store_dE(dEacc, ii, inum, tid, t_per_atom, offset, dE) \ +#define store_dE(dEacc, ii, inum, tid, t_per_atom, offset, i, dE) \ if (t_per_atom>1) { \ simdsync(); \ simd_reduce_add1(t_per_atom, red_acc, offset, tid, dEacc); \ } \ if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ dEacc += shfl_down(dEacc, s, t_per_atom); \ } \ } \ if (offset==0 && ii1) { \ - simdsync(); \ - simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ - drhoEacc.x, drhoEacc.y); \ - } \ - if (offset==0 && ii1) { \ + simdsync(); \ + simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ + drhoEacc.x, drhoEacc.y); \ + } \ + if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \ @@ -47,7 +48,8 @@ _texture_2d( vel_tex,int4); } \ } \ if (offset==0 && ii1) { \ - simdsync(); \ - simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ - drhoEacc.x, drhoEacc.y); \ - } \ - if (offset==0 && ii1) { \ + simdsync(); \ + simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ + drhoEacc.x, drhoEacc.y); \ + } \ + if (offset==0 && ii1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \ @@ -47,7 +48,8 @@ _texture_2d( vel_tex,int4); } \ } \ if (offset==0 && iiago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->v); + cpu_time, success, atom->vest); } else { inum = list->inum; ilist = list->ilist; @@ -122,7 +122,7 @@ void PairSPHHeatConductionGPU::compute(int eflag, int vflag) sph_heatconduction_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, - atom->tag, atom->v); + atom->tag, atom->vest); } if (!success) error->one(FLERR, "Insufficient memory on accelerator"); diff --git a/src/GPU/pair_sph_lj_gpu.cpp b/src/GPU/pair_sph_lj_gpu.cpp index 46d7b38073..d503a26335 100644 --- a/src/GPU/pair_sph_lj_gpu.cpp +++ b/src/GPU/pair_sph_lj_gpu.cpp @@ -114,7 +114,7 @@ void PairSPHLJGPU::compute(int eflag, int vflag) neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->v); + cpu_time, success, atom->vest); } else { inum = list->inum; ilist = list->ilist; @@ -123,7 +123,7 @@ void PairSPHLJGPU::compute(int eflag, int vflag) sph_lj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, - atom->tag, atom->v); + atom->tag, atom->vest); } if (!success) error->one(FLERR, "Insufficient memory on accelerator"); @@ -136,21 +136,21 @@ void PairSPHLJGPU::compute(int eflag, int vflag) int nlocal = atom->nlocal; if (acc_float) { auto drhoE_ptr = (float *)drhoE_pinned; - int idx = 0; - for (int i = 0; i < nlocal; i++) { - drho[i] = drhoE_ptr[idx]; - desph[i] = drhoE_ptr[idx+1]; - idx += 2; - } + for (int i = 0; i < nlocal; i++) + drho[i] += drhoE_ptr[i]; + + drhoE_ptr += nlocal; + for (int i = 0; i < nlocal; i++) + desph[i] += drhoE_ptr[i]; } else { auto drhoE_ptr = (double *)drhoE_pinned; - int idx = 0; - for (int i = 0; i < nlocal; i++) { - drho[i] = drhoE_ptr[idx]; - desph[i] = drhoE_ptr[idx+1]; - idx += 2; - } + for (int i = 0; i < nlocal; i++) + drho[i] += drhoE_ptr[i]; + + drhoE_ptr += nlocal; + for (int i = 0; i < nlocal; i++) + desph[i] += drhoE_ptr[i]; } if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0) diff --git a/src/GPU/pair_sph_taitwater_gpu.cpp b/src/GPU/pair_sph_taitwater_gpu.cpp index 6f2762c144..23252cea8a 100644 --- a/src/GPU/pair_sph_taitwater_gpu.cpp +++ b/src/GPU/pair_sph_taitwater_gpu.cpp @@ -18,6 +18,7 @@ #include "pair_sph_taitwater_gpu.h" #include "atom.h" +#include "comm.h" #include "domain.h" #include "error.h" #include "force.h" @@ -85,6 +86,25 @@ void PairSPHTaitwaterGPU::compute(int eflag, int vflag) { ev_init(eflag, vflag); + // check consistency of pair coefficients + + if (first) { + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = 1; i <= atom->ntypes; i++) { + if (cutsq[i][j] > 1.e-32) { + if (!setflag[i][i] || !setflag[j][j]) { + if (comm->me == 0) { + printf( + "SPH particle types %d and %d interact with cutoff=%g, but not all of their single particle properties are set.\n", + i, j, sqrt(cutsq[i][j])); + } + } + } + } + } + first = 0; + } + int nall = atom->nlocal + atom->nghost; int inum, host_start; @@ -110,7 +130,7 @@ void PairSPHTaitwaterGPU::compute(int eflag, int vflag) firstneigh = sph_taitwater_gpu_compute_n( neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, - cpu_time, success, atom->v); + cpu_time, success, atom->vest); } else { inum = list->inum; ilist = list->ilist; @@ -118,7 +138,7 @@ void PairSPHTaitwaterGPU::compute(int eflag, int vflag) firstneigh = list->firstneigh; sph_taitwater_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, - atom->tag, atom->v); + atom->tag, atom->vest); } if (!success) error->one(FLERR, "Insufficient memory on accelerator"); @@ -131,21 +151,21 @@ void PairSPHTaitwaterGPU::compute(int eflag, int vflag) int nlocal = atom->nlocal; if (acc_float) { auto drhoE_ptr = (float *)drhoE_pinned; - int idx = 0; - for (int i = 0; i < nlocal; i++) { - drho[i] = drhoE_ptr[idx]; - desph[i] = drhoE_ptr[idx+1]; - idx += 2; - } + for (int i = 0; i < nlocal; i++) + drho[i] += drhoE_ptr[i]; + + drhoE_ptr += nlocal; + for (int i = 0; i < nlocal; i++) + desph[i] += drhoE_ptr[i]; } else { auto drhoE_ptr = (double *)drhoE_pinned; - int idx = 0; - for (int i = 0; i < nlocal; i++) { - drho[i] = drhoE_ptr[idx]; - desph[i] = drhoE_ptr[idx+1]; - idx += 2; - } + for (int i = 0; i < nlocal; i++) + drho[i] += drhoE_ptr[i]; + + drhoE_ptr += nlocal; + for (int i = 0; i < nlocal; i++) + desph[i] += drhoE_ptr[i]; } if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)