From 8fef6a10dd7d97af95787bd48dc40bff4d2013b8 Mon Sep 17 00:00:00 2001 From: Vsevak Date: Fri, 18 Jun 2021 00:52:23 +0300 Subject: [PATCH 01/15] Fix atom types handling in the tip4p/gpu kernels --- lib/gpu/lal_lj_tip4p_long.cu | 38 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/lib/gpu/lal_lj_tip4p_long.cu b/lib/gpu/lal_lj_tip4p_long.cu index 782ae43662..ba552e9df8 100644 --- a/lib/gpu/lal_lj_tip4p_long.cu +++ b/lib/gpu/lal_lj_tip4p_long.cu @@ -140,7 +140,8 @@ __kernel void k_lj_tip4p_long_distrib(const __global numtyp4 *restrict x_, engv[inum*engv_iter + i] += vM.z * (acctyp)0.5 * alpha; } } - } else { + } + if (itype == typeO) { fM = ansO[i]; int iH1 = hneigh[i*4 ]; int iH2 = hneigh[i*4+1]; @@ -202,7 +203,8 @@ __kernel void k_lj_tip4p_reneigh(const __global numtyp4 *restrict x_, hneigh[i*4+1] = iH2; hneigh[i*4+2] = -1; } - } else { + } + if (itype == typeH) { if (hneigh[i*4+2] != -1) { int iI, iH; iI = atom_mapping(map,tag[i] - 1); @@ -301,12 +303,13 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, int non_local_oxy = 0; int iH1, iH2, iO; - if(itype == typeO) { + if (itype == typeO) { iO = i; iH1 = hneigh[i*4 ]; iH2 = hneigh[i*4+1]; x1 = m[iO]; - } else { + } + if (itype == typeH) { iO = hneigh[i *4 ]; iH1 = hneigh[iO*4 ]; iH2 = hneigh[iO*4+1]; @@ -390,7 +393,8 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, f.y += dely * force_coul; f.z += delz * force_coul; f.w += 0; - } else { + } + if (itype == typeO) { fO.x += delx * force_coul; fO.y += dely * force_coul; fO.z += delz * force_coul; @@ -412,7 +416,8 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, virial[3] += delx*fd.y; virial[4] += delx*fd.z; virial[5] += dely*fd.z; - } else { + } + if (jtype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdj; numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); @@ -429,7 +434,8 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, virial[4] += (ix.x - vdj.x)*fd.z; virial[5] += (ix.y - vdj.y)*fd.z; } - } else { + } + if (itype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdi, vdj; numtyp4 xH1; fetch4(xH1,iH1,pos_tex); @@ -439,7 +445,7 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, vdi.y = xO.y*cO + xH1.y*cH + xH2.y*cH; vdi.z = xO.z*cO + xH1.z*cH + xH2.z*cH; //vdi.w = vdi.w; - if (jtype != typeH) { + if (jtype == typeO) { numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); numtyp4 xjO; fetch4(xjO,jO,pos_tex); @@ -625,12 +631,13 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, int non_local_oxy = 0; int iH1, iH2, iO; - if(itype == typeO) { + if (itype == typeO) { iO = i; iH1 = hneigh[i*4 ]; iH2 = hneigh[i*4+1]; x1 = m[iO]; - } else { + } + if (itype == typeH) { iO = hneigh[i *4 ]; iH1 = hneigh[iO*4 ]; iH2 = hneigh[iO*4+1]; @@ -714,7 +721,8 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, f.y += dely * force_coul; f.z += delz * force_coul; f.w += 0; - } else { + } + if (itype == typeO) { fO.x += delx * force_coul; fO.y += dely * force_coul; fO.z += delz * force_coul; @@ -736,7 +744,8 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, virial[3] += delx*fd.y; virial[4] += delx*fd.z; virial[5] += dely*fd.z; - } else { + } + if (jtype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdj; numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); @@ -753,7 +762,8 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, virial[4] += (ix.x - vdj.x)*fd.z; virial[5] += (ix.y - vdj.y)*fd.z; } - } else { + } + if (itype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdi, vdj; numtyp4 xH1; fetch4(xH1,iH1,pos_tex); @@ -763,7 +773,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, vdi.y = xO.y*cO + xH1.y*cH + xH2.y*cH; vdi.z = xO.z*cO + xH1.z*cH + xH2.z*cH; //vdi.w = vdi.w; - if (jtype != typeH) { + if (jtype == typeO) { numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); numtyp4 xjO; fetch4(xjO,jO,pos_tex); From d982d153f870b412941ea50eda3771b7bcf75969 Mon Sep 17 00:00:00 2001 From: Vsevak Date: Fri, 18 Jun 2021 18:26:53 +0300 Subject: [PATCH 02/15] Fix conditions for correct results on other types --- lib/gpu/lal_lj_tip4p_long.cu | 128 +++++++++++++++++------------------ 1 file changed, 61 insertions(+), 67 deletions(-) diff --git a/lib/gpu/lal_lj_tip4p_long.cu b/lib/gpu/lal_lj_tip4p_long.cu index ba552e9df8..026f08b3f9 100644 --- a/lib/gpu/lal_lj_tip4p_long.cu +++ b/lib/gpu/lal_lj_tip4p_long.cu @@ -388,17 +388,16 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, prefactor *= qqrd2e*qtmp/r; numtyp force_coul = r2inv*prefactor * (_erfc + EWALD_F*grij*expm2 - factor_coul); - if (itype == typeH) { - f.x += delx * force_coul; - f.y += dely * force_coul; - f.z += delz * force_coul; - f.w += 0; - } if (itype == typeO) { fO.x += delx * force_coul; fO.y += dely * force_coul; fO.z += delz * force_coul; fO.w += 0; + } else { + f.x += delx * force_coul; + f.y += dely * force_coul; + f.z += delz * force_coul; + f.w += 0; } if (eflag>0) { e_coul += prefactor*(_erfc-factor_coul); @@ -408,33 +407,6 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, fd.x = delx*force_coul; fd.y = dely*force_coul; fd.z = delz*force_coul; - if (itype == typeH) { - if (jtype == typeH) { - virial[0] += delx*fd.x; - virial[1] += dely*fd.y; - virial[2] += delz*fd.z; - virial[3] += delx*fd.y; - virial[4] += delx*fd.z; - virial[5] += dely*fd.z; - } - if (jtype == typeO) { - numtyp cO = 1 - alpha, cH = 0.5*alpha; - numtyp4 vdj; - numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); - numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); - numtyp4 xjO; fetch4(xjO,jO,pos_tex); - vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH; - vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH; - vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH; - //vdj.w = vdj.w; - virial[0] += (ix.x - vdj.x)*fd.x; - virial[1] += (ix.y - vdj.y)*fd.y; - virial[2] += (ix.z - vdj.z)*fd.z; - virial[3] += (ix.x - vdj.x)*fd.y; - virial[4] += (ix.x - vdj.x)*fd.z; - virial[5] += (ix.y - vdj.y)*fd.z; - } - } if (itype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdi, vdj; @@ -460,6 +432,31 @@ __kernel void k_lj_tip4p_long(const __global numtyp4 *restrict x_, vO[3] += 0.5*(vdi.x - vdj.x)*fd.y; vO[4] += 0.5*(vdi.x - vdj.x)*fd.z; vO[5] += 0.5*(vdi.y - vdj.y)*fd.z; + } else { + if (jtype == typeO) { + numtyp cO = 1 - alpha, cH = 0.5*alpha; + numtyp4 vdj; + numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); + numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); + numtyp4 xjO; fetch4(xjO,jO,pos_tex); + vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH; + vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH; + vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH; + //vdj.w = vdj.w; + virial[0] += (ix.x - vdj.x)*fd.x; + virial[1] += (ix.y - vdj.y)*fd.y; + virial[2] += (ix.z - vdj.z)*fd.z; + virial[3] += (ix.x - vdj.x)*fd.y; + virial[4] += (ix.x - vdj.x)*fd.z; + virial[5] += (ix.y - vdj.y)*fd.z; + } else { + virial[0] += delx*fd.x; + virial[1] += dely*fd.y; + virial[2] += delz*fd.z; + virial[3] += delx*fd.y; + virial[4] += delx*fd.z; + virial[5] += dely*fd.z; + } } } } @@ -617,7 +614,7 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, } __syncthreads(); - if (ii0) { e_coul += prefactor*(_erfc-factor_coul); @@ -736,33 +732,6 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, fd.x = delx*force_coul; fd.y = dely*force_coul; fd.z = delz*force_coul; - if (itype == typeH) { - if (jtype == typeH) { - virial[0] += delx*fd.x; - virial[1] += dely*fd.y; - virial[2] += delz*fd.z; - virial[3] += delx*fd.y; - virial[4] += delx*fd.z; - virial[5] += dely*fd.z; - } - if (jtype == typeO) { - numtyp cO = 1 - alpha, cH = 0.5*alpha; - numtyp4 vdj; - numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); - numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); - numtyp4 xjO; fetch4(xjO,jO,pos_tex); - vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH; - vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH; - vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH; - //vdj.w = vdj.w; - virial[0] += (ix.x - vdj.x)*fd.x; - virial[1] += (ix.y - vdj.y)*fd.y; - virial[2] += (ix.z - vdj.z)*fd.z; - virial[3] += (ix.x - vdj.x)*fd.y; - virial[4] += (ix.x - vdj.x)*fd.z; - virial[5] += (ix.y - vdj.y)*fd.z; - } - } if (itype == typeO) { numtyp cO = 1 - alpha, cH = 0.5*alpha; numtyp4 vdi, vdj; @@ -788,6 +757,31 @@ __kernel void k_lj_tip4p_long_fast(const __global numtyp4 *restrict x_, vO[3] += 0.5*(vdi.x - vdj.x)*fd.y; vO[4] += 0.5*(vdi.x - vdj.x)*fd.z; vO[5] += 0.5*(vdi.y - vdj.y)*fd.z; + } else { + if (jtype == typeO) { + numtyp cO = 1 - alpha, cH = 0.5*alpha; + numtyp4 vdj; + numtyp4 xjH1; fetch4(xjH1,jH1,pos_tex); + numtyp4 xjH2; fetch4(xjH2,jH2,pos_tex); + numtyp4 xjO; fetch4(xjO,jO,pos_tex); + vdj.x = xjO.x*cO + xjH1.x*cH + xjH2.x*cH; + vdj.y = xjO.y*cO + xjH1.y*cH + xjH2.y*cH; + vdj.z = xjO.z*cO + xjH1.z*cH + xjH2.z*cH; + //vdj.w = vdj.w; + virial[0] += (ix.x - vdj.x)*fd.x; + virial[1] += (ix.y - vdj.y)*fd.y; + virial[2] += (ix.z - vdj.z)*fd.z; + virial[3] += (ix.x - vdj.x)*fd.y; + virial[4] += (ix.x - vdj.x)*fd.z; + virial[5] += (ix.y - vdj.y)*fd.z; + } else { + virial[0] += delx*fd.x; + virial[1] += dely*fd.y; + virial[2] += delz*fd.z; + virial[3] += delx*fd.y; + virial[4] += delx*fd.z; + virial[5] += dely*fd.z; + } } } } From cfd9cf625dd90697d9b7e40623f123d032683690 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Fri, 25 Jun 2021 09:47:54 +1000 Subject: [PATCH 03/15] Initial draft of Kokkos acclerated PPPM routines for triclinic cells. --- src/KOKKOS/pppm_kokkos.cpp | 553 +++++++++++++++++++++++-------------- src/KOKKOS/pppm_kokkos.h | 41 ++- 2 files changed, 381 insertions(+), 213 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index dbda29d807..98b6492e48 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1,7 +1,6 @@ -// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories + https://lammps.sandia.gov/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -31,6 +30,7 @@ #include "memory_kokkos.h" #include "pair.h" #include "remap_kokkos.h" +#include "kokkos_few.h" #include @@ -67,7 +67,7 @@ PPPMKokkos::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp) pppmflag = 1; group_group_enable = 0; - triclinic_support = 0; + triclinic_support = 1; nfactors = 3; //factors = new int[nfactors]; @@ -455,80 +455,92 @@ void PPPMKokkos::operator()(TagPPPM_setup4, const int &n) const template void PPPMKokkos::setup_triclinic() { -// int i,j,k,n; -// double *prd; -// -// // volume-dependent factors -// // adjust z dimension for 2d slab PPPM -// // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 -// -// prd = domain->prd; -// -// double xprd = prd[0]; -// double yprd = prd[1]; -// double zprd = prd[2]; -// double zprd_slab = zprd*slab_volfactor; -// volume = xprd * yprd * zprd_slab; -// -// // use lamda (0-1) coordinates -// -// delxinv = nx_pppm; -// delyinv = ny_pppm; -// delzinv = nz_pppm; -// delvolinv = delxinv*delyinv*delzinv/volume; -// -// // d_fkx,d_fky,d_fkz for my FFT grid pts -// -// double per_i,per_j,per_k; -// -// n = 0; -// for (k = nzlo_fft; k <= nzhi_fft; k++) { // parallel_for -// per_k = k - nz_pppm*(2*k/nz_pppm); -// for (j = nylo_fft; j <= nyhi_fft; j++) { -// per_j = j - ny_pppm*(2*j/ny_pppm); -// for (i = nxlo_fft; i <= nxhi_fft; i++) { -// per_i = i - nx_pppm*(2*i/nx_pppm); -// -// double unitk_lamda[3]; -// unitk_lamda[0] = 2.0*MY_PI*per_i; -// unitk_lamda[1] = 2.0*MY_PI*per_j; -// unitk_lamda[2] = 2.0*MY_PI*per_k; -// x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); -// d_fkx[n] = unitk_lamda[0]; -// d_fky[n] = unitk_lamda[1]; -// d_fkz[n] = unitk_lamda[2]; -// n++; -// } -// } -// } -// -// // virial coefficients -// -// double sqk,vterm; -// -// for (n = 0; n < nfft; n++) { // parallel_for -// sqk = d_fkx[n]*d_fkx[n] + d_fky[n]*d_fky[n] + d_fkz[n]*d_fkz[n]; -// if (sqk == 0.0) { -// d_vg(n,0) = 0.0; -// d_vg(n,1) = 0.0; -// d_vg(n,2) = 0.0; -// d_vg(n,3) = 0.0; -// d_vg(n,4) = 0.0; -// d_vg(n,5) = 0.0; -// } else { -// vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); -// d_vg(n,0) = 1.0 + vterm*d_fkx[n]*d_fkx[n]; -// d_vg(n,1) = 1.0 + vterm*d_fky[n]*d_fky[n]; -// d_vg(n,2) = 1.0 + vterm*d_fkz[n]*d_fkz[n]; -// d_vg(n,3) = vterm*d_fkx[n]*d_fky[n]; -// d_vg(n,4) = vterm*d_fkx[n]*d_fkz[n]; -// d_vg(n,5) = vterm*d_fky[n]*d_fkz[n]; -// } -// } -// -// compute_gf_ik_triclinic(); + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + prd = domain->prd; + // Update simulation box parameters + h = Few(domain->h); + h_inv = Few(domain->h_inv); + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // use lamda (0-1) coordinates + + delxinv = nx_pppm; + delyinv = ny_pppm; + delzinv = nz_pppm; + delvolinv = delxinv*delyinv*delzinv/volume; + + // merge three outer loops into one for better threading + numz_fft = nzhi_fft-nzlo_fft + 1; + numy_fft = nyhi_fft-nylo_fft + 1; + numx_fft = nxhi_fft-nxlo_fft + 1; + const int inum_fft = numx_fft*numy_fft*numz_fft; + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_fft),*this); + copymode = 0; + + // virial coefficients + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); + copymode = 0; + + compute_gf_ik_triclinic(); } +template +KOKKOS_INLINE_FUNCTION +void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &n) const +{ + const int k = n/(numy_fft*numx_fft); + const int j = (n - k*numy_fft*numx_fft) / numx_fft; + const int i = n - k*numy_fft*numx_fft - j*numx_fft; + double per_k = k - nz_pppm*(2*k/nz_pppm); + double per_j = j - ny_pppm*(2*j/ny_pppm); + double per_i = i - nx_pppm*(2*i/nx_pppm); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*per_i; + unitk_lamda[1] = 2.0*MY_PI*per_j; + unitk_lamda[2] = 2.0*MY_PI*per_k; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + d_fkx[n] = unitk_lamda[0]; + d_fky[n] = unitk_lamda[1]; + d_fkz[n] = unitk_lamda[2]; +} + +template +KOKKOS_INLINE_FUNCTION +void PPPMKokkos::operator()(TagPPPM_setup_triclinic2, const int &n) const +{ + const double sqk = d_fkx[n]*d_fkx[n] + d_fky[n]*d_fky[n] + d_fkz[n]*d_fkz[n]; + if (sqk == 0.0) { + d_vg(n,0) = 0.0; + d_vg(n,1) = 0.0; + d_vg(n,2) = 0.0; + d_vg(n,3) = 0.0; + d_vg(n,4) = 0.0; + d_vg(n,5) = 0.0; + } else { + const double vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + d_vg(n,0) = 1.0 + vterm*d_fkx[n]*d_fkx[n]; + d_vg(n,1) = 1.0 + vterm*d_fky[n]*d_fky[n]; + d_vg(n,2) = 1.0 + vterm*d_fkz[n]*d_fkz[n]; + d_vg(n,3) = vterm*d_fkx[n]*d_fky[n]; + d_vg(n,4) = vterm*d_fkx[n]*d_fkz[n]; + d_vg(n,5) = vterm*d_fky[n]*d_fkz[n]; + } +} /* ---------------------------------------------------------------------- reset local grid arrays and communication stencils called by fix balance b/c it changed sizes of processor sub-domains @@ -1004,7 +1016,7 @@ void PPPMKokkos::set_grid_global() tmp[0] = nx_pppm; tmp[1] = ny_pppm; tmp[2] = nz_pppm; - x2lamdaT(&tmp[0],&tmp[0]); + KSpace::x2lamdaT(&tmp[0],&tmp[0]); h_x = 1.0/tmp[0]; h_y = 1.0/tmp[1]; h_z = 1.0/tmp[2]; @@ -1311,6 +1323,10 @@ void PPPMKokkos::compute_gf_ik_triclinic() nby = static_cast (tmp[1]); nbz = static_cast (tmp[2]); + // Update the local copy of the domain box tilt + h = Few(domain->h); + h_inv = Few(domain->h_inv); + twoorder = 2*order; copymode = 1; @@ -1320,71 +1336,71 @@ void PPPMKokkos::compute_gf_ik_triclinic() template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_compute_gf_ik_triclinic, const int &/*m*/) const +void PPPMKokkos::operator()(TagPPPM_compute_gf_ik_triclinic, const int &m) const { - //int n = (m - nzlo_fft)*(nyhi_fft+1 - nylo_fft)*(nxhi_fft+1 - nxlo_fft); - // - //const int mper = m - nz_pppm*(2*m/nz_pppm); - //const double snz = square(sin(MY_PI*mper/nz_pppm)); - // - //for (int l = nylo_fft; l <= nyhi_fft; l++) { - // const int lper = l - ny_pppm*(2*l/ny_pppm); - // const double sny = square(sin(MY_PI*lper/ny_pppm)); - // - // for (int k = nxlo_fft; k <= nxhi_fft; k++) { - // const int kper = k - nx_pppm*(2*k/nx_pppm); - // const double snx = square(sin(MY_PI*kper/nx_pppm)); - // - // double unitk_lamda[3]; - // unitk_lamda[0] = 2.0*MY_PI*kper; - // unitk_lamda[1] = 2.0*MY_PI*lper; - // unitk_lamda[2] = 2.0*MY_PI*mper; - // x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - // - // const double sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - // - // if (sqk != 0.0) { - // const double numerator = 12.5663706/sqk; - // const double denominator = gf_denom(snx,sny,snz); - // double sum1 = 0.0; - // - // for (int nx = -nbx; nx <= nbx; nx++) { - // const double argx = MY_PI*kper/nx_pppm + MY_PI*nx; - // const double wx = powsinxx(argx,twoorder); - // - // for (int ny = -nby; ny <= nby; ny++) { - // const double argy = MY_PI*lper/ny_pppm + MY_PI*ny; - // const double wy = powsinxx(argy,twoorder); - // - // for (int nz = -nbz; nz <= nbz; nz++) { - // const double argz = MY_PI*mper/nz_pppm + MY_PI*nz; - // const double wz = powsinxx(argz,twoorder); - // - // double b[3]; - // b[0] = 2.0*MY_PI*nx_pppm*nx; - // b[1] = 2.0*MY_PI*ny_pppm*ny; - // b[2] = 2.0*MY_PI*nz_pppm*nz; - // x2lamdaT(&b[0],&b[0]); - // - // const double qx = unitk_lamda[0]+b[0]; - // const double sx = exp(-0.25*square(qx/g_ewald)); - // - // const double qy = unitk_lamda[1]+b[1]; - // const double sy = exp(-0.25*square(qy/g_ewald)); - // - // const double qz = unitk_lamda[2]+b[2]; - // const double sz = exp(-0.25*square(qz/g_ewald)); - // - // const double dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; - // const double dot2 = qx*qx+qy*qy+qz*qz; - // sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - // } - // } - // } - // d_greensfn[n++] = numerator*sum1/denominator; - // } else d_greensfn[n++] = 0.0; - // } - //} + int n = (m - nzlo_fft)*(nyhi_fft+1 - nylo_fft)*(nxhi_fft+1 - nxlo_fft); + + const int mper = m - nz_pppm*(2*m/nz_pppm); + const double snz = square(sin(MY_PI*mper/nz_pppm)); + + for (int l = nylo_fft; l <= nyhi_fft; l++) { + const int lper = l - ny_pppm*(2*l/ny_pppm); + const double sny = square(sin(MY_PI*lper/ny_pppm)); + + for (int k = nxlo_fft; k <= nxhi_fft; k++) { + const int kper = k - nx_pppm*(2*k/nx_pppm); + const double snx = square(sin(MY_PI*kper/nx_pppm)); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*kper; + unitk_lamda[1] = 2.0*MY_PI*lper; + unitk_lamda[2] = 2.0*MY_PI*mper; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + + const double sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); + + if (sqk != 0.0) { + const double numerator = 12.5663706/sqk; + const double denominator = gf_denom(snx,sny,snz); + double sum1 = 0.0; + + for (int nx = -nbx; nx <= nbx; nx++) { + const double argx = MY_PI*kper/nx_pppm + MY_PI*nx; + const double wx = powsinxx(argx,twoorder); + + for (int ny = -nby; ny <= nby; ny++) { + const double argy = MY_PI*lper/ny_pppm + MY_PI*ny; + const double wy = powsinxx(argy,twoorder); + + for (int nz = -nbz; nz <= nbz; nz++) { + const double argz = MY_PI*mper/nz_pppm + MY_PI*nz; + const double wz = powsinxx(argz,twoorder); + + double b[3]; + b[0] = 2.0*MY_PI*nx_pppm*nx; + b[1] = 2.0*MY_PI*ny_pppm*ny; + b[2] = 2.0*MY_PI*nz_pppm*nz; + x2lamdaT(&b[0],&b[0]); + + const double qx = unitk_lamda[0]+b[0]; + const double sx = exp(-0.25*square(qx/g_ewald)); + + const double qy = unitk_lamda[1]+b[1]; + const double sy = exp(-0.25*square(qy/g_ewald)); + + const double qz = unitk_lamda[2]+b[2]; + const double sz = exp(-0.25*square(qz/g_ewald)); + + const double dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; + const double dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + d_greensfn[n++] = numerator*sum1/denominator; + } else d_greensfn[n++] = 0.0; + } + } } /* ---------------------------------------------------------------------- @@ -1867,107 +1883,224 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik10, const int &ii) con template void PPPMKokkos::poisson_ik_triclinic() { -// int i,j,k,n; -// -// // compute gradients of V(r) in each of 3 dims by transforming ik*V(k) -// // FFT leaves data in 3d brick decomposition -// // copy it into inner portion of vdx,vdy,vdz arrays -// -// // x direction gradient -// -// n = 0; -// for (i = 0; i < nfft; i++) { // parallel_for1 -// d_work2[n] = -d_fkx[i]*d_work1[n+1]; -// d_work2[n+1] = d_fkx[i]*d_work1[n]; -// n += 2; -// } -// -// fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); -// -// n = 0; -// for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for2 -// -// -// // y direction gradient -// -// n = 0; -// for (i = 0; i < nfft; i++) { // parallel_for3 -// d_work2[n] = -d_fky[i]*d_work1[n+1]; -// d_work2[n+1] = d_fky[i]*d_work1[n]; -// n += 2; -// } -// -// fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); -// -// n = 0; -// for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for4 -// for (j = nylo_in-nylo_out; j <= nyhi_in-nylo_out; j++) -// for (i = nxlo_in-nxlo_out; i <= nxhi_in-nxlo_out; i++) { -// d_vdy_brick(k,j,i) = d_work2[n]; -// n += 2; -// } -// -// // z direction gradient -// -// n = 0; -// for (i = 0; i < nfft; i++) { // parallel_for5 -// d_work2[n] = -d_fkz[i]*d_work1[n+1]; -// d_work2[n+1] = d_fkz[i]*d_work1[n]; -// n += 2; -// } -// -// fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); -// -// n = 0; -// for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for6 -// +/************************************************** + int i,j,k,n; + + // compute gradients of V(r) in each of 3 dims by transforming ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { // parallel_for1 + d_work2[n] = -d_fkx[i]*d_work1[n+1]; + d_work2[n+1] = d_fkx[i]*d_work1[n]; + n += 2; + } + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + n = 0; + for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for2 + + + // y direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { // parallel_for3 + d_work2[n] = -d_fky[i]*d_work1[n+1]; + d_work2[n+1] = d_fky[i]*d_work1[n]; + n += 2; + } + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + n = 0; + for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for4 + for (j = nylo_in-nylo_out; j <= nyhi_in-nylo_out; j++) + for (i = nxlo_in-nxlo_out; i <= nxhi_in-nxlo_out; i++) { + d_vdy_brick(k,j,i) = d_work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { // parallel_for5 + d_work2[n] = -d_fkz[i]*d_work1[n+1]; + d_work2[n+1] = d_fkz[i]*d_work1[n]; + n += 2; + } + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + n = 0; + for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for6 +******************************/ + int i,j,k,n; + + // compute gradients of V(r) in each of 3 dims by transforming ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + //n = 0; + //for (i = 0; i < nfft; i++) { + // d_work2[n] = -d_fkx[i]*d_work1[n+1]; + // d_work2[n+1] = d_fkx[i]*d_work1[n]; + // n += 2; + //} + + // merge three outer loops into one for better threading + + numz_fft = nzhi_fft-nzlo_fft + 1; + numy_fft = nyhi_fft-nylo_fft + 1; + numx_fft = nxhi_fft-nxlo_fft + 1; + const int inum_fft = numz_fft*numy_fft*numx_fft; + + numz_inout = (nzhi_in-nzlo_out)-(nzlo_in-nzlo_out) + 1; + numy_inout = (nyhi_in-nylo_out)-(nylo_in-nylo_out) + 1; + numx_inout = (nxhi_in-nxlo_out)-(nxlo_in-nxlo_out) + 1; + const int inum_inout = numz_inout*numy_inout*numx_inout; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); + copymode = 0; + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + //n = 0; + //for (k = nzlo_in; k <= nzhi_in; k++) + // for (j = nylo_in; j <= nyhi_in; j++) + // for (i = nxlo_in; i <= nxhi_in; i++) { + // d_vdx_brick(k,j,i) = d_work2[n]; + // n += 2; + // } + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); + copymode = 0; + + // y direction gradient + + //n = 0; + //for (i = 0; i < nfft; i++) { + // d_work2[n] = -d_fky[i]*d_work1[n+1]; + // d_work2[n+1] = d_fky[i]*d_work1[n]; + // n += 2; + //} + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); + copymode = 0; + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + //n = 0; + //for (k = nzlo_in; k <= nzhi_in; k++) + // for (j = nylo_in; j <= nyhi_in; j++) + // for (i = nxlo_in; i <= nxhi_in; i++) { + // d_vdy_brick(k,j,i) = d_work2[n]; + // n += 2; + // } + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); + copymode = 0; + + // z direction gradient + + //n = 0; + //for (i = 0; i < nfft; i++) { + // d_work2[n] = -d_fkz[i]*d_work1[n+1]; + // d_work2[n+1] = d_fkz[i]*d_work1[n]; + // n += 2; + //} + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); + copymode = 0; + + fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); + + //n = 0; + //for (k = nzlo_in; k <= nzhi_in; k++) + // for (j = nylo_in; j <= nyhi_in; j++) + // for (i = nxlo_in; i <= nxhi_in; i++) { + // d_vdz_brick(k,j,i) = d_work2[n]; + // n += 2; + // } + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); + copymode = 0; + } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic1, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic1, const int &ii) const { - + d_work2[2*ii] = -d_fkx[ii]*d_work1[2*ii+1]; + d_work2[2*ii+1] = d_fkx[ii]*d_work1[2*ii]; } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic2, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic2, const int &ii) const { -// for (j = nylo_in-nylo_out; j <= nyhi_in-nylo_out; j++) -// for (i = nxlo_in-nxlo_out; i <= nxhi_in-nxlo_out; i++) { -// d_vdx_brick(k,j,i) = d_work2[n]; -// n += 2; -// } + const int n = ii*2; + int k = ii/(numy_inout*numx_inout); + int j = (ii - k*numy_inout*numx_inout) / numx_inout; + int i = ii - k*numy_inout*numx_inout - j*numx_inout; + k += nzlo_in-nzlo_out; + j += nylo_in-nylo_out; + i += nxlo_in-nxlo_out; + d_vdx_brick(k,j,i) = d_work2[n]; } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic3, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic3, const int &ii) const { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; + d_work2[2*ii] = -d_fky[ii]*d_work1[2*ii+1]; + d_work2[2*ii+1] = d_fky[ii]*d_work1[2*ii]; } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic4, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic4, const int &ii) const { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; // + + const int n = ii*2; + int k = ii/(numy_inout*numx_inout); + int j = (ii - k*numy_inout*numx_inout) / numx_inout; + int i = ii - k*numy_inout*numx_inout - j*numx_inout; + k += nzlo_in-nzlo_out; + j += nylo_in-nylo_out; + i += nxlo_in-nxlo_out; + d_vdy_brick(k,j,i) = d_work2[n]; } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic5, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic5, const int &ii) const { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; // + d_work2[2*ii] = -d_fkz[ii]*d_work1[2*ii+1]; + d_work2[2*ii+1] = d_fkz[ii]*d_work1[2*ii]; } template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic6, const int &/*k*/) const +void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic6, const int &ii) const { // int n = (k - (nzlo_in-nzlo_out))*((nyhi_in-nylo_out) - (nylo_in-nylo_out) + 1)*((nxhi_in-nxlo_out) - (nxlo_in-nxlo_out) + 1)*2; // @@ -1976,6 +2109,14 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik_triclinic6, const int // d_vdz_brick(k,j,i) = d_work2[n]; // n += 2; // } + const int n = ii*2; + int k = ii/(numy_inout*numx_inout); + int j = (ii - k*numy_inout*numx_inout) / numx_inout; + int i = ii - k*numy_inout*numx_inout - j*numx_inout; + k += nzlo_in-nzlo_out; + j += nylo_in-nylo_out; + i += nxlo_in-nxlo_out; + d_vdz_brick(k,j,i) = d_work2[n]; } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 6b7b7f1f89..c92210b748 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -1,7 +1,6 @@ -// clang-format off /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/, Sandia National Laboratories + https://lammps.sandia.gov/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -13,11 +12,11 @@ ------------------------------------------------------------------------- */ #ifdef KSPACE_CLASS -// clang-format off -KSpaceStyle(pppm/kk,PPPMKokkos); -KSpaceStyle(pppm/kk/device,PPPMKokkos); -KSpaceStyle(pppm/kk/host,PPPMKokkos); -// clang-format on + +KSpaceStyle(pppm/kk,PPPMKokkos) +KSpaceStyle(pppm/kk/device,PPPMKokkos) +KSpaceStyle(pppm/kk/host,PPPMKokkos) + #else #ifndef LMP_PPPM_KOKKOS_H @@ -29,6 +28,7 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos); #include "kokkos_base_fft.h" #include "fftdata_kokkos.h" #include "kokkos_type.h" +#include "kokkos_few.h" // fix up FFT defines for KOKKOS with CUDA @@ -55,6 +55,8 @@ struct TagPPPM_setup1{}; struct TagPPPM_setup2{}; struct TagPPPM_setup3{}; struct TagPPPM_setup4{}; +struct TagPPPM_setup_triclinic1{}; +struct TagPPPM_setup_triclinic2{}; struct TagPPPM_compute_gf_ik{}; struct TagPPPM_compute_gf_ik_triclinic{}; struct TagPPPM_self1{}; @@ -138,6 +140,12 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { KOKKOS_INLINE_FUNCTION void operator()(TagPPPM_setup4, const int&) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagPPPM_setup_triclinic1, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPPPM_setup_triclinic2, const int&) const; + KOKKOS_INLINE_FUNCTION void operator()(TagPPPM_compute_gf_ik, const int&) const; @@ -309,6 +317,25 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int numx_out,numy_out,numz_out; int ix,iy,nlocal; + // Local copies of the domain box tilt etc. + // TODO: Will need to put in a less + // hacky solution later, since this needs to be manually updated + Few h, h_inv; + + KOKKOS_INLINE_FUNCTION + void x2lamdaT(double* v, double* lamda) const + { + double lamda_tmp[3]; + + lamda_tmp[0] = h_inv[0]*v[0]; + lamda_tmp[1] = h_inv[5]*v[0] + h_inv[1]*v[1]; + lamda_tmp[2] = h_inv[4]*v[0] + h_inv[3]*v[1] + h_inv[2]*v[2]; + + lamda[0] = lamda_tmp[0]; + lamda[1] = lamda_tmp[1]; + lamda[2] = lamda_tmp[2]; + } + int nx,ny,nz; typename AT::t_int_1d_um d_list_index; typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; From e400e5b6f7be6530db7ca63ca4f422d6dc4e8fee Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 14 Jul 2021 14:46:54 +1000 Subject: [PATCH 04/15] Fixed bug in PPPMKokkos::setup_triclinic for MPI calculations. This fix should probably be considered a temporary fix - it relies on a 3-dimensional Kokkos range which seems to be disfavoured in the rest of LAMMPS' codebase. --- src/KOKKOS/pppm_kokkos.cpp | 122 +++---------------------------------- src/KOKKOS/pppm_kokkos.h | 3 +- 2 files changed, 10 insertions(+), 115 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 98b6492e48..3fe93f6825 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -455,7 +455,6 @@ void PPPMKokkos::operator()(TagPPPM_setup4, const int &n) const template void PPPMKokkos::setup_triclinic() { - int i,j,k,n; double *prd; // volume-dependent factors @@ -480,13 +479,9 @@ void PPPMKokkos::setup_triclinic() delzinv = nz_pppm; delvolinv = delxinv*delyinv*delzinv/volume; - // merge three outer loops into one for better threading - numz_fft = nzhi_fft-nzlo_fft + 1; - numy_fft = nyhi_fft-nylo_fft + 1; - numx_fft = nxhi_fft-nxlo_fft + 1; - const int inum_fft = numx_fft*numy_fft*numz_fft; copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_fft),*this); + Kokkos::parallel_for(Kokkos::MDRangePolicy, DeviceType, TagPPPM_setup_triclinic1>\ + ({nzlo_fft, nylo_fft, nxlo_fft}, {nzhi_fft+1, nyhi_fft+1, nxhi_fft+1}),*this); copymode = 0; // virial coefficients @@ -500,14 +495,14 @@ void PPPMKokkos::setup_triclinic() template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &n) const +void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &k, const int &j, const int& i) const { - const int k = n/(numy_fft*numx_fft); - const int j = (n - k*numy_fft*numx_fft) / numx_fft; - const int i = n - k*numy_fft*numx_fft - j*numx_fft; double per_k = k - nz_pppm*(2*k/nz_pppm); double per_j = j - ny_pppm*(2*j/ny_pppm); double per_i = i - nx_pppm*(2*i/nx_pppm); + // n corresponds to the "number" of this iteration if we were to execute the loop monotonically and in serial + int n = (nxhi_fft - nxlo_fft + 1)*(nyhi_fft - nylo_fft + 1)*(k - nzlo_fft)+ + (nxhi_fft - nxlo_fft + 1)*(j - nylo_fft) + (i - nxlo_fft); double unitk_lamda[3]; unitk_lamda[0] = 2.0*MY_PI*per_i; @@ -1244,7 +1239,7 @@ void PPPMKokkos::compute_gf_ik() numz_fft = nzhi_fft-nzlo_fft + 1; numy_fft = nyhi_fft-nylo_fft + 1; numx_fft = nxhi_fft-nxlo_fft + 1; - const int inum_fft = numz_fft*numy_fft*numx_fft; + const int inum_fft = numx_fft*numy_fft*numz_fft; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_fft),*this); @@ -1883,76 +1878,12 @@ void PPPMKokkos::operator()(TagPPPM_poisson_ik10, const int &ii) con template void PPPMKokkos::poisson_ik_triclinic() { -/************************************************** - int i,j,k,n; - // compute gradients of V(r) in each of 3 dims by transforming ik*V(k) // FFT leaves data in 3d brick decomposition // copy it into inner portion of vdx,vdy,vdz arrays // x direction gradient - n = 0; - for (i = 0; i < nfft; i++) { // parallel_for1 - d_work2[n] = -d_fkx[i]*d_work1[n+1]; - d_work2[n+1] = d_fkx[i]*d_work1[n]; - n += 2; - } - - fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - - n = 0; - for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for2 - - - // y direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { // parallel_for3 - d_work2[n] = -d_fky[i]*d_work1[n+1]; - d_work2[n+1] = d_fky[i]*d_work1[n]; - n += 2; - } - - fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - - n = 0; - for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for4 - for (j = nylo_in-nylo_out; j <= nyhi_in-nylo_out; j++) - for (i = nxlo_in-nxlo_out; i <= nxhi_in-nxlo_out; i++) { - d_vdy_brick(k,j,i) = d_work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { // parallel_for5 - d_work2[n] = -d_fkz[i]*d_work1[n+1]; - d_work2[n+1] = d_fkz[i]*d_work1[n]; - n += 2; - } - - fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - - n = 0; - for (k = nzlo_in-nzlo_out; k <= nzhi_in-nzlo_out; k++) // parallel_for6 -******************************/ - int i,j,k,n; - - // compute gradients of V(r) in each of 3 dims by transforming ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - //n = 0; - //for (i = 0; i < nfft; i++) { - // d_work2[n] = -d_fkx[i]*d_work1[n+1]; - // d_work2[n+1] = d_fkx[i]*d_work1[n]; - // n += 2; - //} - // merge three outer loops into one for better threading numz_fft = nzhi_fft-nzlo_fft + 1; @@ -1971,68 +1902,30 @@ void PPPMKokkos::poisson_ik_triclinic() fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - //n = 0; - //for (k = nzlo_in; k <= nzhi_in; k++) - // for (j = nylo_in; j <= nyhi_in; j++) - // for (i = nxlo_in; i <= nxhi_in; i++) { - // d_vdx_brick(k,j,i) = d_work2[n]; - // n += 2; - // } - copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); copymode = 0; // y direction gradient - //n = 0; - //for (i = 0; i < nfft; i++) { - // d_work2[n] = -d_fky[i]*d_work1[n+1]; - // d_work2[n+1] = d_fky[i]*d_work1[n]; - // n += 2; - //} - copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); copymode = 0; fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - //n = 0; - //for (k = nzlo_in; k <= nzhi_in; k++) - // for (j = nylo_in; j <= nyhi_in; j++) - // for (i = nxlo_in; i <= nxhi_in; i++) { - // d_vdy_brick(k,j,i) = d_work2[n]; - // n += 2; - // } - copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); copymode = 0; // z direction gradient - //n = 0; - //for (i = 0; i < nfft; i++) { - // d_work2[n] = -d_fkz[i]*d_work1[n+1]; - // d_work2[n+1] = d_fkz[i]*d_work1[n]; - // n += 2; - //} - copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nfft),*this); copymode = 0; fft2->compute(d_work2,d_work2,FFT3dKokkos::BACKWARD); - //n = 0; - //for (k = nzlo_in; k <= nzhi_in; k++) - // for (j = nylo_in; j <= nyhi_in; j++) - // for (i = nxlo_in; i <= nxhi_in; i++) { - // d_vdz_brick(k,j,i) = d_work2[n]; - // n += 2; - // } - copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_inout),*this); copymode = 0; @@ -3040,3 +2933,4 @@ template class PPPMKokkos; template class PPPMKokkos; #endif } + diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index c92210b748..e7c55b1726 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -141,7 +141,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void operator()(TagPPPM_setup4, const int&) const; KOKKOS_INLINE_FUNCTION - void operator()(TagPPPM_setup_triclinic1, const int&) const; + void operator()(TagPPPM_setup_triclinic1, const int&, const int&, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPPPM_setup_triclinic2, const int&) const; @@ -623,3 +623,4 @@ accuracy. This error should not occur for typical problems. Please send an email to the developers. */ + From e7ba4179a75ef9b05eababbe4ebf00d60f4fde0e Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Thu, 29 Jul 2021 09:53:08 +1000 Subject: [PATCH 05/15] Added Kokkos-enabled version of compute temp/deform. --- src/KOKKOS/compute_temp_deform_kokkos.cpp | 285 ++++++++++++++++++++++ src/KOKKOS/compute_temp_deform_kokkos.h | 127 ++++++++++ src/KOKKOS/compute_temp_kokkos.h | 23 +- src/compute_temp_deform.cpp | 9 +- 4 files changed, 430 insertions(+), 14 deletions(-) create mode 100644 src/KOKKOS/compute_temp_deform_kokkos.cpp create mode 100644 src/KOKKOS/compute_temp_deform_kokkos.h diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp new file mode 100644 index 0000000000..6040504aeb --- /dev/null +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -0,0 +1,285 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Emily Kahl (UQ) +------------------------------------------------------------------------- */ + +#include "compute_temp_deform_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "update.h" +#include "memory_kokkos.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +ComputeTempDeformKokkos::ComputeTempDeformKokkos(LAMMPS *lmp, int narg, char **arg) : + ComputeTempDeform(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + domainKK = (DomainKokkos *) domain; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = V_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; + datamask_modify = EMPTY_MASK; + + maxbias = 0; +} + +template +ComputeTempDeformKokkos::~ComputeTempDeformKokkos() +{ + + +} + +/* ---------------------------------------------------------------------- */ + +template +double ComputeTempDeformKokkos::compute_scalar() +{ + atomKK->sync(execution_space,datamask_read); + atomKK->k_mass.sync(); + + invoked_scalar = update->ntimestep; + + v = atomKK->k_v.view(); + x = atomKK->k_x.view(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view(); + else + mass = atomKK->k_mass.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + double t = 0.0; + CTEMP t_kk; + + /**************** EVK ****************/ + // Convert from box coords to lamda coords + domainKK->x2lamda(nlocal); + + copymode = 1; + if (atomKK->rmass) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + else + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + copymode = 0; + + /**************** EVK ****************/ + // Convert back to box coords + domainKK->lamda2x(nlocal); + + t = t_kk.t0; // could make this more efficient + + MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world); + if (dynamic) dof_compute(); + if (dof < 0.0 && natoms_temp > 0.0) + error->all(FLERR,"Temperature compute degrees of freedom < 0"); + scalar *= tfactor; + + return scalar; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ComputeTempDeformKokkos::operator()(TagComputeTempDeformScalar, const int &i, CTEMP& t_kk) const { + + double *h_rate = domainKK->h_rate; + double *h_ratelo = domainKK->h_ratelo; + double vstream[3],vthermal[3]; + + vstream[0] = h_rate[0]*x(i,0) + h_rate[5]*x(i,1) + h_rate[4]*x(i,2) + h_ratelo[0]; + vstream[1] = h_rate[1]*x(i,1) + h_rate[3]*x(i,2) + h_ratelo[1]; + vstream[2] = h_rate[2]*x(i,2) + h_ratelo[2]; + vthermal[0] = v(i,0) - vstream[0]; + vthermal[1] = v(i,1) - vstream[1]; + vthermal[2] = v(i,2) - vstream[2]; + if (RMASS) { + if (mask[i] & groupbit) + t_kk.t0 += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + vthermal[2]*vthermal[2]) * rmass[i]; + } else { + if (mask[i] & groupbit) + t_kk.t0 += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + vthermal[2]*vthermal[2]) * mass[type[i]]; + } +} + +/* ---------------------------------------------------------------------- */ +template +void ComputeTempDeformKokkos::compute_vector() +{ + atomKK->sync(execution_space,datamask_read); + + int i; + + invoked_vector = update->ntimestep; + + v = atomKK->k_v.view(); + x = atomKK->k_x.view(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view(); + else + mass = atomKK->k_mass.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + double t[6]; + for (i = 0; i < 6; i++) t[i] = 0.0; + CTEMP t_kk; + + /**************** EVK ****************/ + // Convert from box coords to lamda coords + domainKK->x2lamda(nlocal); + + copymode = 1; + if (atomKK->rmass) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + else + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); + copymode = 0; + + /**************** EVK ****************/ + // Convert back to box coords + domainKK->lamda2x(nlocal); + + t[0] = t_kk.t0; + t[1] = t_kk.t1; + t[2] = t_kk.t2; + t[3] = t_kk.t3; + t[4] = t_kk.t4; + t[5] = t_kk.t5; + + MPI_Allreduce(t,vector,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) vector[i] *= force->mvv2e; +} + +template +template +KOKKOS_INLINE_FUNCTION +void ComputeTempDeformKokkos::operator()(TagComputeTempDeformVector, const int &i, CTEMP& t_kk) const { + + double *h_rate = domainKK->h_rate; + double *h_ratelo = domainKK->h_ratelo; + double vstream[3],vthermal[3]; + + vstream[0] = h_rate[0]*x(i,0) + h_rate[5]*x(i,1) + h_rate[4]*x(i,2) + h_ratelo[0]; + vstream[1] = h_rate[1]*x(i,1) + h_rate[3]*x(i,2) + h_ratelo[1]; + vstream[2] = h_rate[2]*x(i,2) + h_ratelo[2]; + vthermal[0] = v(i,0) - vstream[0]; + vthermal[1] = v(i,1) - vstream[1]; + vthermal[2] = v(i,2) - vstream[2]; + + if (mask[i] & groupbit) { + F_FLOAT massone = 0.0; + if (RMASS) massone = rmass[i]; + else massone = mass[type[i]]; + t_kk.t0 += massone * vthermal[0]*vthermal[0]; + t_kk.t1 += massone * vthermal[1]*vthermal[1]; + t_kk.t2 += massone * vthermal[2]*vthermal[2]; + t_kk.t3 += massone * vthermal[0]*vthermal[1]; + t_kk.t4 += massone * vthermal[0]*vthermal[2]; + t_kk.t5 += massone * vthermal[1]*vthermal[2]; + } +} + +/* ---------------------------------------------------------------------- */ +template +void ComputeTempDeformKokkos::remove_bias_all() +{ + atomKK->sync(execution_space,datamask_read); + v = atomKK->k_v.view(); + x = atomKK->k_x.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + if (atom->nmax > maxbias) { + //memoryKK->destroy_kokkos(vbiasall); + maxbias = atom->nmax; + //memoryKK->create_kokkos(vbiasall,maxbias,"temp/deform/kk:vbiasall"); + vbiasall = typename ArrayTypes::t_v_array("temp/deform/kk:vbiasall", maxbias); + } + + /**************** EVK ****************/ + // Convert from box coords to lamda coords + domainKK->x2lamda(nlocal); + + h_rate = domain->h_rate; + h_ratelo = domain->h_ratelo; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; + + /**************** EVK ****************/ + // Convert back to box coords + domainKK->lamda2x(nlocal); +} + +template +KOKKOS_INLINE_FUNCTION +void ComputeTempDeformKokkos::operator()(TagComputeTempDeformRemoveBias, const int &i) const { + if (mask[i] & groupbit) { + vbiasall(i,0) = h_rate[0]*x(i,0) + h_rate[5]*x(i,1) + h_rate[4]*x(i,2) + h_ratelo[0]; + vbiasall(i,1) = h_rate[1]*x(i,1) + h_rate[3]*x(i,2) + h_ratelo[1]; + vbiasall(i,2) = h_rate[2]*x(i,2) + h_ratelo[2]; + v(i,0) -= vbiasall(i,0); + v(i,1) -= vbiasall(i,1); + v(i,2) -= vbiasall(i,2); + } +} + +/* ---------------------------------------------------------------------- */ +template +void ComputeTempDeformKokkos::restore_bias_all() +{ + atomKK->sync(execution_space,datamask_read); + v = atomKK->k_v.view(); + x = atomKK->k_x.view(); + mask = atomKK->k_mask.view(); + int nlocal = atom->nlocal; + + copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void ComputeTempDeformKokkos::operator()(TagComputeTempDeformRestoreBias, const int &i) const { + if (mask[i] & groupbit) { + v(i,0) += vbiasall(i,0); + v(i,1) += vbiasall(i,1); + v(i,2) += vbiasall(i,2); + } +} + +namespace LAMMPS_NS { +template class ComputeTempDeformKokkos; +#ifdef LMP_KOKKOS_GPU +template class ComputeTempDeformKokkos; +#endif +} diff --git a/src/KOKKOS/compute_temp_deform_kokkos.h b/src/KOKKOS/compute_temp_deform_kokkos.h new file mode 100644 index 0000000000..4ff6d59674 --- /dev/null +++ b/src/KOKKOS/compute_temp_deform_kokkos.h @@ -0,0 +1,127 @@ +// clang-format off +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(temp/deform/kk,ComputeTempDeformKokkos); +ComputeStyle(temp/deform/kk/device,ComputeTempDeformKokkos); +ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos); +// clang-format on +#else + +#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H +#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H + +#include "compute_temp_deform.h" +#include "kokkos_type.h" +#include "domain_kokkos.h" +#include "kokkos_few.h" + +namespace LAMMPS_NS { + +template +struct TagComputeTempDeformScalar{}; + +template +struct TagComputeTempDeformVector{}; + +struct TagComputeTempDeformRemoveBias{}; + +struct TagComputeTempDeformRestoreBias{}; + +template +class ComputeTempDeformKokkos: public ComputeTempDeform { + public: + struct s_CTEMP { + double t0, t1, t2, t3, t4, t5; + KOKKOS_INLINE_FUNCTION + s_CTEMP() { + t0 = t1 = t2 = t3 = t4 = t5 = 0.0; + } + KOKKOS_INLINE_FUNCTION + s_CTEMP& operator+=(const s_CTEMP &rhs) { + t0 += rhs.t0; + t1 += rhs.t1; + t2 += rhs.t2; + t3 += rhs.t3; + t4 += rhs.t4; + t5 += rhs.t5; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_CTEMP &rhs) volatile { + t0 += rhs.t0; + t1 += rhs.t1; + t2 += rhs.t2; + t3 += rhs.t3; + t4 += rhs.t4; + t5 += rhs.t5; + } + }; + + typedef s_CTEMP CTEMP; + typedef CTEMP value_type; + typedef DeviceType device_type; + typedef ArrayTypes AT; + + ComputeTempDeformKokkos(class LAMMPS *, int, char **); + ~ComputeTempDeformKokkos(); + double compute_scalar(); + void compute_vector(); + void remove_bias_all(); + void restore_bias_all(); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempDeformScalar, const int&, CTEMP&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempDeformVector, const int&, CTEMP&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempDeformRemoveBias, const int &i) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagComputeTempDeformRestoreBias, const int &i) const; + + protected: + typename ArrayTypes::t_x_array_randomread x; + typename ArrayTypes::t_v_array v; + typename ArrayTypes::t_v_array vbiasall; + typename ArrayTypes::t_float_1d_randomread rmass; + typename ArrayTypes::t_float_1d_randomread mass; + typename ArrayTypes::t_int_1d_randomread type; + typename ArrayTypes::t_int_1d_randomread mask; + + class DomainKokkos *domainKK; + + Few h_rate, h_ratelo; + + }; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Temperature compute degrees of freedom < 0 + +This should not happen if you are calculating the temperature +on a valid set of atoms. + +*/ diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index 792e2e17db..4cbace02d7 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -28,6 +28,16 @@ ComputeStyle(temp/kk/host,ComputeTempKokkos); namespace LAMMPS_NS { +template +struct TagComputeTempScalar{}; + +template +struct TagComputeTempVector{}; + +template +class ComputeTempKokkos : public ComputeTemp { + public: + struct s_CTEMP { double t0, t1, t2, t3, t4, t5; KOKKOS_INLINE_FUNCTION @@ -55,23 +65,14 @@ namespace LAMMPS_NS { t5 += rhs.t5; } }; + typedef s_CTEMP CTEMP; - -template -struct TagComputeTempScalar{}; - -template -struct TagComputeTempVector{}; - -template -class ComputeTempKokkos : public ComputeTemp { - public: typedef DeviceType device_type; typedef CTEMP value_type; typedef ArrayTypes AT; ComputeTempKokkos(class LAMMPS *, int, char **); - virtual ~ComputeTempKokkos() {} + virtual ~ComputeTempKokkos() {}; double compute_scalar(); void compute_vector(); diff --git a/src/compute_temp_deform.cpp b/src/compute_temp_deform.cpp index 0c158e96e6..7b17b12fd8 100644 --- a/src/compute_temp_deform.cpp +++ b/src/compute_temp_deform.cpp @@ -56,8 +56,11 @@ ComputeTempDeform::ComputeTempDeform(LAMMPS *lmp, int narg, char **arg) : ComputeTempDeform::~ComputeTempDeform() { - memory->destroy(vbiasall); - delete [] vector; + if (!copymode) + { + memory->destroy(vbiasall); + delete [] vector; + } } /* ---------------------------------------------------------------------- */ @@ -69,7 +72,7 @@ void ComputeTempDeform::init() // check fix deform remap settings for (i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"deform") == 0) { + if (strncmp(modify->fix[i]->style,"deform", 6) == 0) { if (((FixDeform *) modify->fix[i])->remapflag == Domain::X_REMAP && comm->me == 0) error->warning(FLERR,"Using compute temp/deform with inconsistent " From 8945d81be3e444b412860987b86e8853f6a010c3 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Thu, 29 Jul 2021 09:54:15 +1000 Subject: [PATCH 06/15] Added Kokkos accelerated SLLOD thermostat (nvt/sllod/kk). --- src/KOKKOS/fix_nvt_sllod_kokkos.cpp | 171 ++++++++++++++++++++++++++++ src/KOKKOS/fix_nvt_sllod_kokkos.h | 97 ++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 src/KOKKOS/fix_nvt_sllod_kokkos.cpp create mode 100644 src/KOKKOS/fix_nvt_sllod_kokkos.h diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp new file mode 100644 index 0000000000..7d9ad3a61a --- /dev/null +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -0,0 +1,171 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/ + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Emily Kahl (UQ, e.kahl@uq.edu.au) +------------------------------------------------------------------------- */ + +#include "fix_nvt_sllod_kokkos.h" + +#include "atom.h" +#include "compute.h" +#include "domain.h" +#include "error.h" +#include "fix.h" +#include "fix_deform_kokkos.h" +#include "group.h" +#include "math_extra.h" +#include "modify.h" +#include "atom_kokkos.h" +#include "atom.h" +#include "atom_masks.h" +#include "kokkos_few.h" +#include "memory_kokkos.h" + +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixNVTSllodKokkos::FixNVTSllodKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNHKokkos(lmp, narg, arg) +{ + /************************ EVK *********************/ + atomKK = (AtomKokkos *) this->atom; + this->kokkosable = 1; + this->domainKK = (DomainKokkos *) this->domain; + + if (!this->tstat_flag) + this->error->all(FLERR,"Temperature control must be used with fix nvt/kk"); + if (this->pstat_flag) + this->error->all(FLERR,"Pressure control can not be used with fix nvt/kk"); + + if (this->mtchain_default_flag) this->mtchain = 1; + + this->id_temp = utils::strdup(std::string(this->id)+"_temp"); + this->modify->add_compute(fmt::format("{} all temp/deform/kk",this->id_temp)); + this->tcomputeflag = 1; +} + +/* ---------------------------------------------------------------------- */ + +template +FixNVTSllodKokkos::~FixNVTSllodKokkos() +{ + + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixNVTSllodKokkos::init() +{ + FixNHKokkos::init(); + + vdelu = typename ArrayTypes::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal); + + if (!this->temperature->tempbias) + this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias"); + + nondeformbias = 0; + if (strncmp(this->temperature->style,"temp/deform", 11) != 0) nondeformbias = 1; + + // check fix deform remap settings + + int i; + for (i = 0; i < this->modify->nfix; i++) + if (strncmp(this->modify->fix[i]->style,"deform",6) == 0) { + if (((FixDeform *) this->modify->fix[i])->remapflag != Domain::V_REMAP) + this->error->all(FLERR,"Using fix nvt/sllod with inconsistent fix deform " + "remap option"); + break; + } + if (i == this->modify->nfix) + this->error->all(FLERR,"Using fix nvt/sllod with no fix deform defined"); +} + +/* ---------------------------------------------------------------------- + perform half-step scaling of velocities +-----------------------------------------------------------------------*/ + +template +void FixNVTSllodKokkos::nh_v_temp() +{ + // remove and restore bias = streaming velocity = Hrate*lamda + Hratelo + // thermostat thermal velocity only + // vdelu = SLLOD correction = Hrate*Hinv*vthermal + // for non temp/deform BIAS: + // calculate temperature since some computes require temp + // computed on current nlocal atoms to remove bias + + if (nondeformbias){ + atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); + this->temperature->compute_scalar(); + atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + } + v = atomKK->k_v.view(); + mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (this->igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + double h_two[6]; + MathExtra::multiply_shape_shape(this->domain->h_rate,this->domain->h_inv,h_two); + + d_h_two = Few(h_two); + + this->copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + this->copymode = 0; + + this->temperature->remove_bias_all(); + + this->copymode = 1; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + this->copymode = 0; + + this->temperature->restore_bias_all(); +} + +template +KOKKOS_INLINE_FUNCTION +void FixNVTSllodKokkos::operator()(TagFixNVTSllod_temp1, const int &i) const { + if (mask[i] & this->groupbit) { + vdelu(i,0) = d_h_two[0]*v(i,0) + d_h_two[5]*v(i,1) + d_h_two[4]*v(i,2); + vdelu(i,1) = d_h_two[1]*v(i,1) + d_h_two[3]*v(i,2); + vdelu(i,2) = d_h_two[2]*v(i,2); + } +} + +template +KOKKOS_INLINE_FUNCTION +void FixNVTSllodKokkos::operator()(TagFixNVTSllod_temp2, const int &i) const { + if (mask[i] & this->groupbit) { + v(i,0) = v(i,0)*this->factor_eta - this->dthalf*vdelu(i,0); + v(i,1) = v(i,1)*this->factor_eta - this->dthalf*vdelu(i,1); + v(i,2) = v(i,2)*this->factor_eta - this->dthalf*vdelu(i,2); + } +} + +namespace LAMMPS_NS { +template class FixNVTSllodKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixNVTSllodKokkos; +#endif +} + + diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.h b/src/KOKKOS/fix_nvt_sllod_kokkos.h new file mode 100644 index 0000000000..85b4e54bcd --- /dev/null +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.h @@ -0,0 +1,97 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(nvt/sllod/kk,FixNVTSllodKokkos); +FixStyle(nvt/sllod/kk/device,FixNVTSllodKokkos); +FixStyle(nvt/sllod/kk/host,FixNVTSllodKokkos); +// clang-format on +#else + +#ifndef LMP_FIX_NVT_SLLOD_KOKKOS_H +#define LMP_FIX_NVT_SLLOD_KOKKOS_H + +#include "fix_nh_kokkos.h" +//#include "fix_nvt_sllod.h" +#include "kokkos_type.h" +#include "kokkos_few.h" + +namespace LAMMPS_NS { + +struct TagFixNVTSllod_temp1{}; +struct TagFixNVTSllod_temp2{}; + +template +class FixNVTSllodKokkos : public FixNHKokkos { + public: + FixNVTSllodKokkos(class LAMMPS *, int, char **); + ~FixNVTSllodKokkos(); + void init(); + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNVTSllod_temp1, const int& i) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixNVTSllod_temp2, const int& i) const; + + private: + int nondeformbias; + + void nh_v_temp(); + + protected: + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_v_array v; + typename ArrayTypes::t_v_array vdelu; + typename ArrayTypes::t_f_array_const f; + typename ArrayTypes::t_float_1d rmass; + typename ArrayTypes::t_float_1d mass; + typename ArrayTypes::t_int_1d type; + typename ArrayTypes::t_int_1d mask; + + Few d_h_two; + + class DomainKokkos *domainKK; + class AtomKokkos *atomKK; +}; + +} // namespace LAMMPS_NS + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Temperature control must be used with fix nvt/sllod + +Self-explanatory. + +E: Pressure control can not be used with fix nvt/sllod + +Self-explanatory. + +E: Temperature for fix nvt/sllod does not have a bias + +The specified compute must compute temperature with a bias. + +E: Using fix nvt/sllod with inconsistent fix deform remap option + +Fix nvt/sllod requires that deforming atoms have a velocity profile +provided by "remap v" as a fix deform option. + +E: Using fix nvt/sllod with no fix deform defined + +Self-explanatory. + +*/ From 8ae9d51466b49767399f2f71d1c22b4125fcb82c Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Mon, 2 Aug 2021 14:46:03 +1000 Subject: [PATCH 07/15] Fixed memory issues in ComputeTempDeformKokkos. --- src/KOKKOS/compute_temp_deform_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 6040504aeb..2716c4688e 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -80,6 +80,8 @@ double ComputeTempDeformKokkos::compute_scalar() /**************** EVK ****************/ // Convert from box coords to lamda coords domainKK->x2lamda(nlocal); + h_rate = domainKK->h_rate; + h_ratelo = domainKK->h_ratelo; copymode = 1; if (atomKK->rmass) @@ -108,8 +110,6 @@ template KOKKOS_INLINE_FUNCTION void ComputeTempDeformKokkos::operator()(TagComputeTempDeformScalar, const int &i, CTEMP& t_kk) const { - double *h_rate = domainKK->h_rate; - double *h_ratelo = domainKK->h_ratelo; double vstream[3],vthermal[3]; vstream[0] = h_rate[0]*x(i,0) + h_rate[5]*x(i,1) + h_rate[4]*x(i,2) + h_ratelo[0]; @@ -154,6 +154,8 @@ void ComputeTempDeformKokkos::compute_vector() /**************** EVK ****************/ // Convert from box coords to lamda coords domainKK->x2lamda(nlocal); + h_rate = domainKK->h_rate; + h_ratelo = domainKK->h_ratelo; copymode = 1; if (atomKK->rmass) @@ -182,8 +184,6 @@ template KOKKOS_INLINE_FUNCTION void ComputeTempDeformKokkos::operator()(TagComputeTempDeformVector, const int &i, CTEMP& t_kk) const { - double *h_rate = domainKK->h_rate; - double *h_ratelo = domainKK->h_ratelo; double vstream[3],vthermal[3]; vstream[0] = h_rate[0]*x(i,0) + h_rate[5]*x(i,1) + h_rate[4]*x(i,2) + h_ratelo[0]; From d7f9f9fead0744fb1ff07e50609fc16550d7e933 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Mon, 9 Aug 2021 10:19:32 +1000 Subject: [PATCH 08/15] Updated documentation to include Kokkos accelerated NEMD styles. Also tidied up header files and attribution to fit LAMMPS coding style. --- doc/src/Commands_compute.rst | 2 +- doc/src/Commands_fix.rst | 2 +- doc/src/compute_temp_deform.rst | 3 +++ doc/src/fix_nvt_sllod.rst | 3 ++- src/KOKKOS/compute_temp_deform_kokkos.cpp | 3 ++- src/KOKKOS/compute_temp_deform_kokkos.h | 1 - src/KOKKOS/fix_nvt_sllod_kokkos.cpp | 2 +- 7 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/src/Commands_compute.rst b/doc/src/Commands_compute.rst index 9dfb28fa8b..2c67d44f24 100644 --- a/doc/src/Commands_compute.rst +++ b/doc/src/Commands_compute.rst @@ -152,7 +152,7 @@ KOKKOS, o = OPENMP, t = OPT. * :doc:`temp/chunk ` * :doc:`temp/com ` * :doc:`temp/cs ` - * :doc:`temp/deform ` + * :doc:`temp/deform (k) ` * :doc:`temp/deform/eff ` * :doc:`temp/drude ` * :doc:`temp/eff ` diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 45a75ff394..08857b96ec 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -148,7 +148,7 @@ OPT. * :doc:`nvt/body ` * :doc:`nvt/eff ` * :doc:`nvt/manifold/rattle ` - * :doc:`nvt/sllod (io) ` + * :doc:`nvt/sllod (iko) ` * :doc:`nvt/sllod/eff ` * :doc:`nvt/sphere (o) ` * :doc:`nvt/uef ` diff --git a/doc/src/compute_temp_deform.rst b/doc/src/compute_temp_deform.rst index bc4d1ae3f3..6c61f7bc93 100644 --- a/doc/src/compute_temp_deform.rst +++ b/doc/src/compute_temp_deform.rst @@ -1,8 +1,11 @@ .. index:: compute temp/deform +.. index:: compute temp/deform/kk compute temp/deform command =========================== +Accelerator Variants: *temp/deform/kk* + Syntax """""" diff --git a/doc/src/fix_nvt_sllod.rst b/doc/src/fix_nvt_sllod.rst index 9ff22bca09..3d041ca767 100644 --- a/doc/src/fix_nvt_sllod.rst +++ b/doc/src/fix_nvt_sllod.rst @@ -1,11 +1,12 @@ .. index:: fix nvt/sllod .. index:: fix nvt/sllod/intel .. index:: fix nvt/sllod/omp +.. index:: fix nvt/sllod/kk fix nvt/sllod command ===================== -Accelerator Variants: *nvt/sllod/intel*, *nvt/sllod/omp* +Accelerator Variants: *nvt/sllod/intel*, *nvt/sllod/omp*, *nvt/sllod/kk* Syntax """""" diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 2716c4688e..7846e18113 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Emily Kahl (UQ) + Contributing authors: Emily Kahl (Uni. of QLD, e.kahl@uq.edu.au) ------------------------------------------------------------------------- */ #include "compute_temp_deform_kokkos.h" @@ -25,6 +25,7 @@ #include "force.h" #include "update.h" #include "memory_kokkos.h" +#include "domain_kokkos.h" #include diff --git a/src/KOKKOS/compute_temp_deform_kokkos.h b/src/KOKKOS/compute_temp_deform_kokkos.h index 4ff6d59674..9f32069604 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.h +++ b/src/KOKKOS/compute_temp_deform_kokkos.h @@ -25,7 +25,6 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos); #include "compute_temp_deform.h" #include "kokkos_type.h" -#include "domain_kokkos.h" #include "kokkos_few.h" namespace LAMMPS_NS { diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp index 7d9ad3a61a..68574adff0 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Emily Kahl (UQ, e.kahl@uq.edu.au) + Contributing authors: Emily Kahl (Uni. of QLD, e.kahl@uq.edu.au) ------------------------------------------------------------------------- */ #include "fix_nvt_sllod_kokkos.h" From b385c85440c5ab10af686282ee4bd9794f329231 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 11 Aug 2021 14:05:45 +1000 Subject: [PATCH 09/15] Refactored PPPMKokkos::setup_triclinic kernel indexing to be more consistent the rest of the codebase. This commit "fixes" the temporary solution using Kokkos::MDRange in commit a98b8bee88. --- src/KOKKOS/pppm_kokkos.cpp | 20 ++++++++++++++------ src/KOKKOS/pppm_kokkos.h | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3fe93f6825..e2956036c3 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -479,9 +479,13 @@ void PPPMKokkos::setup_triclinic() delzinv = nz_pppm; delvolinv = delxinv*delyinv*delzinv/volume; + numz_fft = nzhi_fft-nzlo_fft + 1; + numy_fft = nyhi_fft-nylo_fft + 1; + numx_fft = nxhi_fft-nxlo_fft + 1; + const int inum_fft = numz_fft*numy_fft*numx_fft; + copymode = 1; - Kokkos::parallel_for(Kokkos::MDRangePolicy, DeviceType, TagPPPM_setup_triclinic1>\ - ({nzlo_fft, nylo_fft, nxlo_fft}, {nzhi_fft+1, nyhi_fft+1, nxhi_fft+1}),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_fft),*this); copymode = 0; // virial coefficients @@ -495,14 +499,18 @@ void PPPMKokkos::setup_triclinic() template KOKKOS_INLINE_FUNCTION -void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &k, const int &j, const int& i) const +void PPPMKokkos::operator()(TagPPPM_setup_triclinic1, const int &n) const { + int k = n/(numy_fft*numx_fft); + int j = (n - k*numy_fft*numx_fft) / numx_fft; + int i = n - k*numy_fft*numx_fft - j*numx_fft; + k += nzlo_fft; + j += nylo_fft; + i += nxlo_fft; + double per_k = k - nz_pppm*(2*k/nz_pppm); double per_j = j - ny_pppm*(2*j/ny_pppm); double per_i = i - nx_pppm*(2*i/nx_pppm); - // n corresponds to the "number" of this iteration if we were to execute the loop monotonically and in serial - int n = (nxhi_fft - nxlo_fft + 1)*(nyhi_fft - nylo_fft + 1)*(k - nzlo_fft)+ - (nxhi_fft - nxlo_fft + 1)*(j - nylo_fft) + (i - nxlo_fft); double unitk_lamda[3]; unitk_lamda[0] = 2.0*MY_PI*per_i; diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index e7c55b1726..ad8612e637 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -141,7 +141,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { void operator()(TagPPPM_setup4, const int&) const; KOKKOS_INLINE_FUNCTION - void operator()(TagPPPM_setup_triclinic1, const int&, const int&, const int&) const; + void operator()(TagPPPM_setup_triclinic1, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPPPM_setup_triclinic2, const int&) const; From 2e59b5c4de8eb195411abb8d32e7887c147e7ff7 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 18 Aug 2021 15:18:55 +1000 Subject: [PATCH 10/15] Fixed whitespace errors and removed some extraneous comments. --- src/KOKKOS/compute_temp_deform_kokkos.cpp | 20 +++------------ src/KOKKOS/fix_nvt_sllod_kokkos.cpp | 11 ++++----- src/KOKKOS/fix_nvt_sllod_kokkos.h | 3 +-- src/KOKKOS/pppm_kokkos.cpp | 30 +++++++++++------------ src/KOKKOS/pppm_kokkos.h | 20 +++++++-------- 5 files changed, 33 insertions(+), 51 deletions(-) diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 7846e18113..91d7d3f942 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -49,7 +49,7 @@ ComputeTempDeformKokkos::ComputeTempDeformKokkos(LAMMPS *lmp, int na } template -ComputeTempDeformKokkos::~ComputeTempDeformKokkos() +ComputeTempDeformKokkos::~ComputeTempDeformKokkos() { @@ -78,12 +78,10 @@ double ComputeTempDeformKokkos::compute_scalar() double t = 0.0; CTEMP t_kk; - /**************** EVK ****************/ - // Convert from box coords to lamda coords domainKK->x2lamda(nlocal); h_rate = domainKK->h_rate; h_ratelo = domainKK->h_ratelo; - + copymode = 1; if (atomKK->rmass) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); @@ -91,11 +89,9 @@ double ComputeTempDeformKokkos::compute_scalar() Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); copymode = 0; - /**************** EVK ****************/ - // Convert back to box coords domainKK->lamda2x(nlocal); - t = t_kk.t0; // could make this more efficient + t = t_kk.t0; MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world); if (dynamic) dof_compute(); @@ -152,8 +148,6 @@ void ComputeTempDeformKokkos::compute_vector() for (i = 0; i < 6; i++) t[i] = 0.0; CTEMP t_kk; - /**************** EVK ****************/ - // Convert from box coords to lamda coords domainKK->x2lamda(nlocal); h_rate = domainKK->h_rate; h_ratelo = domainKK->h_ratelo; @@ -165,8 +159,6 @@ void ComputeTempDeformKokkos::compute_vector() Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,t_kk); copymode = 0; - /**************** EVK ****************/ - // Convert back to box coords domainKK->lamda2x(nlocal); t[0] = t_kk.t0; @@ -218,14 +210,10 @@ void ComputeTempDeformKokkos::remove_bias_all() int nlocal = atom->nlocal; if (atom->nmax > maxbias) { - //memoryKK->destroy_kokkos(vbiasall); maxbias = atom->nmax; - //memoryKK->create_kokkos(vbiasall,maxbias,"temp/deform/kk:vbiasall"); vbiasall = typename ArrayTypes::t_v_array("temp/deform/kk:vbiasall", maxbias); } - /**************** EVK ****************/ - // Convert from box coords to lamda coords domainKK->x2lamda(nlocal); h_rate = domain->h_rate; @@ -235,8 +223,6 @@ void ComputeTempDeformKokkos::remove_bias_all() Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; - /**************** EVK ****************/ - // Convert back to box coords domainKK->lamda2x(nlocal); } diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp index 68574adff0..1ed99cc77c 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -44,7 +44,6 @@ template FixNVTSllodKokkos::FixNVTSllodKokkos(LAMMPS *lmp, int narg, char **arg) : FixNHKokkos(lmp, narg, arg) { - /************************ EVK *********************/ atomKK = (AtomKokkos *) this->atom; this->kokkosable = 1; this->domainKK = (DomainKokkos *) this->domain; @@ -55,7 +54,7 @@ FixNVTSllodKokkos::FixNVTSllodKokkos(LAMMPS *lmp, int narg, char **a this->error->all(FLERR,"Pressure control can not be used with fix nvt/kk"); if (this->mtchain_default_flag) this->mtchain = 1; - + this->id_temp = utils::strdup(std::string(this->id)+"_temp"); this->modify->add_compute(fmt::format("{} all temp/deform/kk",this->id_temp)); this->tcomputeflag = 1; @@ -78,7 +77,7 @@ void FixNVTSllodKokkos::init() FixNHKokkos::init(); vdelu = typename ArrayTypes::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal); - + if (!this->temperature->tempbias) this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias"); @@ -112,8 +111,8 @@ void FixNVTSllodKokkos::nh_v_temp() // for non temp/deform BIAS: // calculate temperature since some computes require temp // computed on current nlocal atoms to remove bias - - if (nondeformbias){ + + if (nondeformbias){ atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); this->temperature->compute_scalar(); atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); @@ -133,7 +132,7 @@ void FixNVTSllodKokkos::nh_v_temp() this->copymode = 0; this->temperature->remove_bias_all(); - + this->copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); this->copymode = 0; diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.h b/src/KOKKOS/fix_nvt_sllod_kokkos.h index 85b4e54bcd..15af600430 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.h +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.h @@ -23,7 +23,6 @@ FixStyle(nvt/sllod/kk/host,FixNVTSllodKokkos); #define LMP_FIX_NVT_SLLOD_KOKKOS_H #include "fix_nh_kokkos.h" -//#include "fix_nvt_sllod.h" #include "kokkos_type.h" #include "kokkos_few.h" @@ -61,7 +60,7 @@ class FixNVTSllodKokkos : public FixNHKokkos { typename ArrayTypes::t_int_1d mask; Few d_h_two; - + class DomainKokkos *domainKK; class AtomKokkos *atomKK; }; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index e2956036c3..b76e36c6e2 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1326,7 +1326,7 @@ void PPPMKokkos::compute_gf_ik_triclinic() nby = static_cast (tmp[1]); nbz = static_cast (tmp[2]); - // Update the local copy of the domain box tilt + // Update the local copy of the domain box tilt h = Few(domain->h); h_inv = Few(domain->h_inv); @@ -1342,58 +1342,58 @@ KOKKOS_INLINE_FUNCTION void PPPMKokkos::operator()(TagPPPM_compute_gf_ik_triclinic, const int &m) const { int n = (m - nzlo_fft)*(nyhi_fft+1 - nylo_fft)*(nxhi_fft+1 - nxlo_fft); - + const int mper = m - nz_pppm*(2*m/nz_pppm); const double snz = square(sin(MY_PI*mper/nz_pppm)); - + for (int l = nylo_fft; l <= nyhi_fft; l++) { const int lper = l - ny_pppm*(2*l/ny_pppm); const double sny = square(sin(MY_PI*lper/ny_pppm)); - + for (int k = nxlo_fft; k <= nxhi_fft; k++) { const int kper = k - nx_pppm*(2*k/nx_pppm); const double snx = square(sin(MY_PI*kper/nx_pppm)); - + double unitk_lamda[3]; unitk_lamda[0] = 2.0*MY_PI*kper; unitk_lamda[1] = 2.0*MY_PI*lper; unitk_lamda[2] = 2.0*MY_PI*mper; x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - + const double sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - + if (sqk != 0.0) { const double numerator = 12.5663706/sqk; const double denominator = gf_denom(snx,sny,snz); double sum1 = 0.0; - + for (int nx = -nbx; nx <= nbx; nx++) { const double argx = MY_PI*kper/nx_pppm + MY_PI*nx; const double wx = powsinxx(argx,twoorder); - + for (int ny = -nby; ny <= nby; ny++) { const double argy = MY_PI*lper/ny_pppm + MY_PI*ny; const double wy = powsinxx(argy,twoorder); - + for (int nz = -nbz; nz <= nbz; nz++) { const double argz = MY_PI*mper/nz_pppm + MY_PI*nz; const double wz = powsinxx(argz,twoorder); - + double b[3]; b[0] = 2.0*MY_PI*nx_pppm*nx; b[1] = 2.0*MY_PI*ny_pppm*ny; b[2] = 2.0*MY_PI*nz_pppm*nz; x2lamdaT(&b[0],&b[0]); - + const double qx = unitk_lamda[0]+b[0]; const double sx = exp(-0.25*square(qx/g_ewald)); - + const double qy = unitk_lamda[1]+b[1]; const double sy = exp(-0.25*square(qy/g_ewald)); - + const double qz = unitk_lamda[2]+b[2]; const double sz = exp(-0.25*square(qz/g_ewald)); - + const double dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; const double dot2 = qx*qx+qy*qy+qz*qz; sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index ad8612e637..f9e2b1a62a 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -317,22 +317,20 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int numx_out,numy_out,numz_out; int ix,iy,nlocal; - // Local copies of the domain box tilt etc. - // TODO: Will need to put in a less - // hacky solution later, since this needs to be manually updated + // Local copies of the domain box tilt etc. Few h, h_inv; KOKKOS_INLINE_FUNCTION void x2lamdaT(double* v, double* lamda) const { - double lamda_tmp[3]; - - lamda_tmp[0] = h_inv[0]*v[0]; - lamda_tmp[1] = h_inv[5]*v[0] + h_inv[1]*v[1]; - lamda_tmp[2] = h_inv[4]*v[0] + h_inv[3]*v[1] + h_inv[2]*v[2]; - - lamda[0] = lamda_tmp[0]; - lamda[1] = lamda_tmp[1]; + double lamda_tmp[3]; + + lamda_tmp[0] = h_inv[0]*v[0]; + lamda_tmp[1] = h_inv[5]*v[0] + h_inv[1]*v[1]; + lamda_tmp[2] = h_inv[4]*v[0] + h_inv[3]*v[1] + h_inv[2]*v[2]; + + lamda[0] = lamda_tmp[0]; + lamda[1] = lamda_tmp[1]; lamda[2] = lamda_tmp[2]; } From 4876e0cbb697128ce4df8c8e4753c992efcda4d4 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 18 Aug 2021 17:37:00 +1000 Subject: [PATCH 11/15] Changed URLs in the headers to point to the new LAMMPS site. --- src/KOKKOS/fix_nvt_sllod_kokkos.h | 4 ++-- src/KOKKOS/pppm_kokkos.cpp | 2 +- src/KOKKOS/pppm_kokkos.h | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.h b/src/KOKKOS/fix_nvt_sllod_kokkos.h index 15af600430..4e731c4b70 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.h +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.h @@ -1,7 +1,7 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - www.cs.sandia.gov/~sjplimp/lammps.html - Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index b76e36c6e2..16cdce16de 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1,6 +1,6 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.sandia.gov/, Sandia National Laboratories + https://lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index f9e2b1a62a..9a69a82762 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -1,6 +1,7 @@ +// clang-format off /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.sandia.gov/, Sandia National Laboratories + https://lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -12,11 +13,11 @@ ------------------------------------------------------------------------- */ #ifdef KSPACE_CLASS - +// clang-format off KSpaceStyle(pppm/kk,PPPMKokkos) KSpaceStyle(pppm/kk/device,PPPMKokkos) KSpaceStyle(pppm/kk/host,PPPMKokkos) - +// clang-format on #else #ifndef LMP_PPPM_KOKKOS_H From 511ac4994986cf269f135e3830d9a3ea8d6c4294 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 18 Aug 2021 05:53:23 -0400 Subject: [PATCH 12/15] reformat, minor cosmetic changes --- src/KOKKOS/compute_temp_deform_kokkos.cpp | 6 +- src/KOKKOS/compute_temp_deform_kokkos.h | 2 +- src/KOKKOS/compute_temp_kokkos.h | 5 +- src/KOKKOS/fix_nvt_sllod_kokkos.cpp | 28 ++--- src/KOKKOS/fix_nvt_sllod_kokkos.h | 5 +- src/KOKKOS/pppm_kokkos.cpp | 2 +- src/KOKKOS/pppm_kokkos.h | 11 +- src/compute_temp_deform.cpp | 130 +++++++++++----------- 8 files changed, 85 insertions(+), 104 deletions(-) diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 91d7d3f942..1d7d087200 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -21,13 +21,11 @@ #include "atom_kokkos.h" #include "atom_masks.h" #include "comm.h" +#include "domain_kokkos.h" #include "error.h" #include "force.h" -#include "update.h" #include "memory_kokkos.h" -#include "domain_kokkos.h" - -#include +#include "update.h" using namespace LAMMPS_NS; diff --git a/src/KOKKOS/compute_temp_deform_kokkos.h b/src/KOKKOS/compute_temp_deform_kokkos.h index 9f32069604..8b53c1f633 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.h +++ b/src/KOKKOS/compute_temp_deform_kokkos.h @@ -24,8 +24,8 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos); #define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H #include "compute_temp_deform.h" -#include "kokkos_type.h" #include "kokkos_few.h" +#include "kokkos_type.h" namespace LAMMPS_NS { diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index 4cbace02d7..ae6ab485d2 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -1,4 +1,3 @@ -// clang-format off /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories @@ -19,13 +18,13 @@ ComputeStyle(temp/kk/device,ComputeTempKokkos); ComputeStyle(temp/kk/host,ComputeTempKokkos); // clang-format on #else - #ifndef LMP_COMPUTE_TEMP_KOKKOS_H #define LMP_COMPUTE_TEMP_KOKKOS_H #include "compute_temp.h" #include "kokkos_type.h" +// clang-format off namespace LAMMPS_NS { template @@ -72,7 +71,7 @@ class ComputeTempKokkos : public ComputeTemp { typedef ArrayTypes AT; ComputeTempKokkos(class LAMMPS *, int, char **); - virtual ~ComputeTempKokkos() {}; + virtual ~ComputeTempKokkos() {} double compute_scalar(); void compute_vector(); diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp index 1ed99cc77c..d0af72f17f 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -19,21 +19,19 @@ #include "fix_nvt_sllod_kokkos.h" #include "atom.h" +#include "atom.h" +#include "atom_kokkos.h" +#include "atom_masks.h" #include "compute.h" #include "domain.h" #include "error.h" #include "fix.h" #include "fix_deform_kokkos.h" #include "group.h" -#include "math_extra.h" -#include "modify.h" -#include "atom_kokkos.h" -#include "atom.h" -#include "atom_masks.h" #include "kokkos_few.h" +#include "math_extra.h" #include "memory_kokkos.h" - -#include +#include "modify.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -62,15 +60,6 @@ FixNVTSllodKokkos::FixNVTSllodKokkos(LAMMPS *lmp, int narg, char **a /* ---------------------------------------------------------------------- */ -template -FixNVTSllodKokkos::~FixNVTSllodKokkos() -{ - - -} - -/* ---------------------------------------------------------------------- */ - template void FixNVTSllodKokkos::init() { @@ -82,16 +71,15 @@ void FixNVTSllodKokkos::init() this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias"); nondeformbias = 0; - if (strncmp(this->temperature->style,"temp/deform", 11) != 0) nondeformbias = 1; + if (utils::strmatch(this->temperature->style,"^temp/deform")) nondeformbias = 1; // check fix deform remap settings int i; for (i = 0; i < this->modify->nfix; i++) - if (strncmp(this->modify->fix[i]->style,"deform",6) == 0) { + if (utils::strmatch(this->modify->fix[i]->style,"^deform")) { if (((FixDeform *) this->modify->fix[i])->remapflag != Domain::V_REMAP) - this->error->all(FLERR,"Using fix nvt/sllod with inconsistent fix deform " - "remap option"); + this->error->all(FLERR,"Using fix nvt/sllod with inconsistent fix deform remap option"); break; } if (i == this->modify->nfix) diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.h b/src/KOKKOS/fix_nvt_sllod_kokkos.h index 4e731c4b70..6057ce44d0 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.h +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.h @@ -23,9 +23,10 @@ FixStyle(nvt/sllod/kk/host,FixNVTSllodKokkos); #define LMP_FIX_NVT_SLLOD_KOKKOS_H #include "fix_nh_kokkos.h" -#include "kokkos_type.h" #include "kokkos_few.h" +#include "kokkos_type.h" +// clang-format off namespace LAMMPS_NS { struct TagFixNVTSllod_temp1{}; @@ -35,7 +36,7 @@ template class FixNVTSllodKokkos : public FixNHKokkos { public: FixNVTSllodKokkos(class LAMMPS *, int, char **); - ~FixNVTSllodKokkos(); + ~FixNVTSllodKokkos() {} void init(); KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 16cdce16de..a7f58f2525 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1,6 +1,6 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.org/, Sandia National Laboratories + https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 9a69a82762..9a333135b7 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -1,7 +1,6 @@ -// clang-format off /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://lammps.org/, Sandia National Laboratories + https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract @@ -14,9 +13,9 @@ #ifdef KSPACE_CLASS // clang-format off -KSpaceStyle(pppm/kk,PPPMKokkos) -KSpaceStyle(pppm/kk/device,PPPMKokkos) -KSpaceStyle(pppm/kk/host,PPPMKokkos) +KSpaceStyle(pppm/kk,PPPMKokkos); +KSpaceStyle(pppm/kk/device,PPPMKokkos); +KSpaceStyle(pppm/kk/host,PPPMKokkos); // clang-format on #else @@ -31,6 +30,8 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos) #include "kokkos_type.h" #include "kokkos_few.h" +// clang-format off + // fix up FFT defines for KOKKOS with CUDA #ifdef KOKKOS_ENABLE_CUDA diff --git a/src/compute_temp_deform.cpp b/src/compute_temp_deform.cpp index 7b17b12fd8..915e682219 100644 --- a/src/compute_temp_deform.cpp +++ b/src/compute_temp_deform.cpp @@ -1,4 +1,3 @@ -// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories @@ -18,27 +17,25 @@ #include "compute_temp_deform.h" -#include -#include "domain.h" #include "atom.h" -#include "update.h" -#include "force.h" -#include "modify.h" +#include "comm.h" +#include "domain.h" +#include "error.h" #include "fix.h" #include "fix_deform.h" +#include "force.h" #include "group.h" -#include "comm.h" #include "memory.h" -#include "error.h" +#include "modify.h" +#include "update.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -ComputeTempDeform::ComputeTempDeform(LAMMPS *lmp, int narg, char **arg) : - Compute(lmp, narg, arg) +ComputeTempDeform::ComputeTempDeform(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg) { - if (narg != 3) error->all(FLERR,"Illegal compute temp/deform command"); + if (narg != 3) error->all(FLERR, "Illegal compute temp/deform command"); scalar_flag = vector_flag = 1; size_vector = 6; @@ -56,10 +53,9 @@ ComputeTempDeform::ComputeTempDeform(LAMMPS *lmp, int narg, char **arg) : ComputeTempDeform::~ComputeTempDeform() { - if (!copymode) - { + if (!copymode) { memory->destroy(vbiasall); - delete [] vector; + delete[] vector; } } @@ -72,16 +68,14 @@ void ComputeTempDeform::init() // check fix deform remap settings for (i = 0; i < modify->nfix; i++) - if (strncmp(modify->fix[i]->style,"deform", 6) == 0) { - if (((FixDeform *) modify->fix[i])->remapflag == Domain::X_REMAP && - comm->me == 0) - error->warning(FLERR,"Using compute temp/deform with inconsistent " - "fix deform remap option"); + if (utils::strmatch(modify->fix[i]->style, "^deform")) { + if (((FixDeform *) modify->fix[i])->remapflag == Domain::X_REMAP && comm->me == 0) + error->warning(FLERR, + "Using compute temp/deform with inconsistent fix deform remap option"); break; } if (i == modify->nfix && comm->me == 0) - error->warning(FLERR, - "Using compute temp/deform with no fix deform defined"); + error->warning(FLERR, "Using compute temp/deform with no fix deform defined"); } /* ---------------------------------------------------------------------- */ @@ -101,15 +95,17 @@ void ComputeTempDeform::dof_compute() natoms_temp = group->count(igroup); dof = domain->dimension * natoms_temp; dof -= extra_dof + fix_dof; - if (dof > 0) tfactor = force->mvv2e / (dof * force->boltz); - else tfactor = 0.0; + if (dof > 0) + tfactor = force->mvv2e / (dof * force->boltz); + else + tfactor = 0.0; } /* ---------------------------------------------------------------------- */ double ComputeTempDeform::compute_scalar() { - double lamda[3],vstream[3],vthermal[3]; + double lamda[3], vstream[3], vthermal[3]; invoked_scalar = update->ntimestep; @@ -132,26 +128,25 @@ double ComputeTempDeform::compute_scalar() for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - domain->x2lamda(x[i],lamda); - vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + - h_rate[4]*lamda[2] + h_ratelo[0]; - vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; - vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + domain->x2lamda(x[i], lamda); + vstream[0] = h_rate[0] * lamda[0] + h_rate[5] * lamda[1] + h_rate[4] * lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1] * lamda[1] + h_rate[3] * lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2] * lamda[2] + h_ratelo[2]; vthermal[0] = v[i][0] - vstream[0]; vthermal[1] = v[i][1] - vstream[1]; vthermal[2] = v[i][2] - vstream[2]; if (rmass) - t += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + - vthermal[2]*vthermal[2]) * rmass[i]; + t += (vthermal[0] * vthermal[0] + vthermal[1] * vthermal[1] + vthermal[2] * vthermal[2]) * + rmass[i]; else - t += (vthermal[0]*vthermal[0] + vthermal[1]*vthermal[1] + - vthermal[2]*vthermal[2]) * mass[type[i]]; + t += (vthermal[0] * vthermal[0] + vthermal[1] * vthermal[1] + vthermal[2] * vthermal[2]) * + mass[type[i]]; } - MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&t, &scalar, 1, MPI_DOUBLE, MPI_SUM, world); if (dynamic) dof_compute(); if (dof < 0.0 && natoms_temp > 0.0) - error->all(FLERR,"Temperature compute degrees of freedom < 0"); + error->all(FLERR, "Temperature compute degrees of freedom < 0"); scalar *= tfactor; return scalar; } @@ -160,7 +155,7 @@ double ComputeTempDeform::compute_scalar() void ComputeTempDeform::compute_vector() { - double lamda[3],vstream[3],vthermal[3]; + double lamda[3], vstream[3], vthermal[3]; invoked_vector = update->ntimestep; @@ -175,31 +170,32 @@ void ComputeTempDeform::compute_vector() double *h_rate = domain->h_rate; double *h_ratelo = domain->h_ratelo; - double massone,t[6]; + double massone, t[6]; for (int i = 0; i < 6; i++) t[i] = 0.0; for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - domain->x2lamda(x[i],lamda); - vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + - h_rate[4]*lamda[2] + h_ratelo[0]; - vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; - vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + domain->x2lamda(x[i], lamda); + vstream[0] = h_rate[0] * lamda[0] + h_rate[5] * lamda[1] + h_rate[4] * lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1] * lamda[1] + h_rate[3] * lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2] * lamda[2] + h_ratelo[2]; vthermal[0] = v[i][0] - vstream[0]; vthermal[1] = v[i][1] - vstream[1]; vthermal[2] = v[i][2] - vstream[2]; - if (rmass) massone = rmass[i]; - else massone = mass[type[i]]; - t[0] += massone * vthermal[0]*vthermal[0]; - t[1] += massone * vthermal[1]*vthermal[1]; - t[2] += massone * vthermal[2]*vthermal[2]; - t[3] += massone * vthermal[0]*vthermal[1]; - t[4] += massone * vthermal[0]*vthermal[2]; - t[5] += massone * vthermal[1]*vthermal[2]; + if (rmass) + massone = rmass[i]; + else + massone = mass[type[i]]; + t[0] += massone * vthermal[0] * vthermal[0]; + t[1] += massone * vthermal[1] * vthermal[1]; + t[2] += massone * vthermal[2] * vthermal[2]; + t[3] += massone * vthermal[0] * vthermal[1]; + t[4] += massone * vthermal[0] * vthermal[2]; + t[5] += massone * vthermal[1] * vthermal[2]; } - MPI_Allreduce(t,vector,6,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(t, vector, 6, MPI_DOUBLE, MPI_SUM, world); for (int i = 0; i < 6; i++) vector[i] *= force->mvv2e; } @@ -213,11 +209,10 @@ void ComputeTempDeform::remove_bias(int i, double *v) double *h_rate = domain->h_rate; double *h_ratelo = domain->h_ratelo; - domain->x2lamda(atom->x[i],lamda); - vbias[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + - h_rate[4]*lamda[2] + h_ratelo[0]; - vbias[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; - vbias[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + domain->x2lamda(atom->x[i], lamda); + vbias[0] = h_rate[0] * lamda[0] + h_rate[5] * lamda[1] + h_rate[4] * lamda[2] + h_ratelo[0]; + vbias[1] = h_rate[1] * lamda[1] + h_rate[3] * lamda[2] + h_ratelo[1]; + vbias[2] = h_rate[2] * lamda[2] + h_ratelo[2]; v[0] -= vbias[0]; v[1] -= vbias[1]; v[2] -= vbias[2]; @@ -233,11 +228,10 @@ void ComputeTempDeform::remove_bias_thr(int i, double *v, double *b) double *h_rate = domain->h_rate; double *h_ratelo = domain->h_ratelo; - domain->x2lamda(atom->x[i],lamda); - b[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + - h_rate[4]*lamda[2] + h_ratelo[0]; - b[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; - b[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + domain->x2lamda(atom->x[i], lamda); + b[0] = h_rate[0] * lamda[0] + h_rate[5] * lamda[1] + h_rate[4] * lamda[2] + h_ratelo[0]; + b[1] = h_rate[1] * lamda[1] + h_rate[3] * lamda[2] + h_ratelo[1]; + b[2] = h_rate[2] * lamda[2] + h_ratelo[2]; v[0] -= b[0]; v[1] -= b[1]; v[2] -= b[2]; @@ -256,7 +250,7 @@ void ComputeTempDeform::remove_bias_all() if (atom->nmax > maxbias) { memory->destroy(vbiasall); maxbias = atom->nmax; - memory->create(vbiasall,maxbias,3,"temp/deform:vbiasall"); + memory->create(vbiasall, maxbias, 3, "temp/deform:vbiasall"); } double lamda[3]; @@ -265,11 +259,11 @@ void ComputeTempDeform::remove_bias_all() for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - domain->x2lamda(atom->x[i],lamda); - vbiasall[i][0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + - h_rate[4]*lamda[2] + h_ratelo[0]; - vbiasall[i][1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; - vbiasall[i][2] = h_rate[2]*lamda[2] + h_ratelo[2]; + domain->x2lamda(atom->x[i], lamda); + vbiasall[i][0] = + h_rate[0] * lamda[0] + h_rate[5] * lamda[1] + h_rate[4] * lamda[2] + h_ratelo[0]; + vbiasall[i][1] = h_rate[1] * lamda[1] + h_rate[3] * lamda[2] + h_ratelo[1]; + vbiasall[i][2] = h_rate[2] * lamda[2] + h_ratelo[2]; v[i][0] -= vbiasall[i][0]; v[i][1] -= vbiasall[i][1]; v[i][2] -= vbiasall[i][2]; @@ -323,6 +317,6 @@ void ComputeTempDeform::restore_bias_all() double ComputeTempDeform::memory_usage() { - double bytes = 3*maxbias * sizeof(double); + double bytes = 3 * maxbias * sizeof(double); return bytes; } From 3dc142c0b00d9860c3ceb908a23cf0a28ff80cba Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Tue, 24 Aug 2021 16:23:05 +1000 Subject: [PATCH 13/15] Added fix_nvt_sllod_kokkos and compute_temp_deform_kokkos to Install.sh --- src/KOKKOS/Install.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 5e7285a75f..e23dd11500 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -43,7 +43,7 @@ elif (test $mode = 0) then fi fi -# list of files with optional dependcies +# list of files with optional dependencies action angle_charmm_kokkos.cpp angle_charmm.cpp action angle_charmm_kokkos.h angle_charmm.h @@ -94,6 +94,8 @@ action compute_orientorder_atom_kokkos.cpp action compute_orientorder_atom_kokkos.h action compute_temp_kokkos.cpp action compute_temp_kokkos.h +action compute_temp_deform_kokkos.cpp +action compute_temp_deform_kokkos.h action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp action dihedral_charmm_kokkos.h dihedral_charmm.h action dihedral_class2_kokkos.cpp dihedral_class2.cpp @@ -135,6 +137,8 @@ action fix_nve_sphere_kokkos.cpp action fix_nve_sphere_kokkos.h action fix_nvt_kokkos.cpp action fix_nvt_kokkos.h +action fix_nvt_sllod_kokkos.cpp +action fix_nvt_sllod_kokkos.h action fix_property_atom_kokkos.cpp action fix_property_atom_kokkos.h action fix_qeq_reaxff_kokkos.cpp fix_qeq_reaxff.cpp From 862cb43fa9a4c3d8dbb1219245ff6f605df93912 Mon Sep 17 00:00:00 2001 From: Vsevak Date: Tue, 24 Aug 2021 17:07:51 +0300 Subject: [PATCH 14/15] Enable unittest for GPU lj/cut/tip4p/long --- unittest/force-styles/tests/mol-pair-lj_cut_tip4p_long.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/force-styles/tests/mol-pair-lj_cut_tip4p_long.yaml b/unittest/force-styles/tests/mol-pair-lj_cut_tip4p_long.yaml index 2e76bdcb57..d1bed9430b 100644 --- a/unittest/force-styles/tests/mol-pair-lj_cut_tip4p_long.yaml +++ b/unittest/force-styles/tests/mol-pair-lj_cut_tip4p_long.yaml @@ -2,7 +2,6 @@ lammps_version: 10 Feb 2021 date_generated: Fri Feb 26 23:08:49 2021 epsilon: 1e-13 -skip_tests: gpu prerequisites: ! | atom full pair lj/cut/tip4p/long From a26da031aa830bee204432807cbc34afe50bd5f5 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Tue, 24 Aug 2021 11:27:30 -0400 Subject: [PATCH 15/15] Use .coveragerc to configure Python coverage reporting --- cmake/.coveragerc.in | 10 ++++++++++ cmake/Modules/CodeCoverage.cmake | 10 ++++++---- unittest/python/CMakeLists.txt | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 cmake/.coveragerc.in diff --git a/cmake/.coveragerc.in b/cmake/.coveragerc.in new file mode 100644 index 0000000000..3dc467d4d0 --- /dev/null +++ b/cmake/.coveragerc.in @@ -0,0 +1,10 @@ +[run] +source = @LAMMPS_PYTHON_DIR@ +parallel=True +branch=True +omit=*/install.py + */setup.py + +[paths] +sources = python + @LAMMPS_PYTHON_DIR@ diff --git a/cmake/Modules/CodeCoverage.cmake b/cmake/Modules/CodeCoverage.cmake index 054e08fc1a..21a651e519 100644 --- a/cmake/Modules/CodeCoverage.cmake +++ b/cmake/Modules/CodeCoverage.cmake @@ -54,6 +54,8 @@ if(ENABLE_COVERAGE) if(COVERAGE_FOUND) set(PYTHON_COVERAGE_HTML_DIR ${CMAKE_BINARY_DIR}/python_coverage_html) + configure_file(.coveragerc.in ${CMAKE_BINARY_DIR}/.coveragerc @ONLY) + add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/unittest/python/.coverage COMMAND ${COVERAGE_BINARY} combine @@ -63,16 +65,16 @@ if(ENABLE_COVERAGE) add_custom_target( gen_python_coverage_html - COMMAND ${COVERAGE_BINARY} html -d ${PYTHON_COVERAGE_HTML_DIR} - DEPENDS ${CMAKE_BINARY_DIR}/unittest/python/.coverage + COMMAND ${COVERAGE_BINARY} html --rcfile=${CMAKE_BINARY_DIR}/.coveragerc -d ${PYTHON_COVERAGE_HTML_DIR} + DEPENDS ${CMAKE_BINARY_DIR}/unittest/python/.coverage ${CMAKE_BINARY_DIR}/.coveragerc WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/unittest/python COMMENT "Generating HTML Python coverage report..." ) add_custom_target( gen_python_coverage_xml - COMMAND ${COVERAGE_BINARY} xml -o ${CMAKE_BINARY_DIR}/python_coverage.xml - DEPENDS ${CMAKE_BINARY_DIR}/unittest/python/.coverage + COMMAND ${COVERAGE_BINARY} xml --rcfile=${CMAKE_BINARY_DIR}/.coveragerc -o ${CMAKE_BINARY_DIR}/python_coverage.xml + DEPENDS ${CMAKE_BINARY_DIR}/unittest/python/.coverage ${CMAKE_BINARY_DIR}/.coveragerc WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/unittest/python COMMENT "Generating XML Python coverage report..." ) diff --git a/unittest/python/CMakeLists.txt b/unittest/python/CMakeLists.txt index 6832f7e028..d76f73aceb 100644 --- a/unittest/python/CMakeLists.txt +++ b/unittest/python/CMakeLists.txt @@ -46,7 +46,7 @@ if(Python_EXECUTABLE) find_package_handle_standard_args(COVERAGE DEFAULT_MSG COVERAGE_BINARY) if(COVERAGE_FOUND) - set(PYTHON_TEST_RUNNER ${Python_EXECUTABLE} -u ${COVERAGE_BINARY} run --parallel-mode --include=${LAMMPS_PYTHON_DIR}/lammps/*.py --omit=${LAMMPS_PYTHON_DIR}/install.py) + set(PYTHON_TEST_RUNNER ${Python_EXECUTABLE} -u ${COVERAGE_BINARY} run --rcfile=${CMAKE_BINARY_DIR}/.coveragerc) else() set(PYTHON_TEST_RUNNER ${Python_EXECUTABLE} -u) endif()