diff --git a/src/Depend.sh b/src/Depend.sh index 8046fd2636..98f3e5de4f 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -127,6 +127,10 @@ if (test $1 = "MANYBODY") then depend OPENMP fi +if (test $1 = "MEAM") then + depend KOKKOS +fi + if (test $1 = "MOLECULE") then depend EXTRA-MOLECULE depend GPU diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 022da7855a..952f21e403 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -207,6 +207,13 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp action nbin_ssa_kokkos.h nbin_ssa.h action math_special_kokkos.cpp action math_special_kokkos.h +action meam_setup_done_kokkos.h meam_setup_done.cpp +action meam_kokkos.h meam.h +action meam_impl_kokkos.h meam_impl.cpp +action meam_funcs_kokkos.h meam_funcs.cpp +action meam_force_kokkos.h meam_force.cpp +action meam_dens_init_kokkos.h meam_dens_init.cpp +action meam_dens_final_kokkos.h meam_dens_final.cpp action min_cg_kokkos.cpp action min_cg_kokkos.h action min_kokkos.cpp @@ -287,6 +294,8 @@ action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp action pair_lj_sdk_kokkos.h pair_lj_sdk.h +action pair_meam_kokkos.h pair_meam.h +action pair_meam_kokkos.cpp pair_meam.cpp action pair_morse_kokkos.cpp action pair_morse_kokkos.h action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp diff --git a/src/KOKKOS/math_special_kokkos.h b/src/KOKKOS/math_special_kokkos.h index 802d1c2979..f2d23a65eb 100644 --- a/src/KOKKOS/math_special_kokkos.h +++ b/src/KOKKOS/math_special_kokkos.h @@ -50,6 +50,18 @@ namespace MathSpecialKokkos { #endif } +#define FM_DOUBLE_LOG2OFE 1.4426950408889634074 + // fast e**x function for little endian CPUs, falls back to libc on other platforms + KOKKOS_INLINE_FUNCTION + static double fm_exp(double x) + { +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return exp2_x86(FM_DOUBLE_LOG2OFE * x); +#else + return ::exp(x); +#endif + } + // x**2, use instead of pow(x,2.0) KOKKOS_INLINE_FUNCTION static double square(const double &x) { return x*x; } diff --git a/src/KOKKOS/meam_dens_final_kokkos.h b/src/KOKKOS/meam_dens_final_kokkos.h new file mode 100644 index 0000000000..b44a8af3bd --- /dev/null +++ b/src/KOKKOS/meam_dens_final_kokkos.h @@ -0,0 +1,181 @@ +#include "meam_kokkos.h" +#include "math_special.h" + +using namespace LAMMPS_NS; + +template +void +MEAMKokkos::meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom, double* eng_vdwl, + typename ArrayTypes::t_efloat_1d eatom, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, int& errorflag) +{ + EV_FLOAT ev; + ev.evdwl = *eng_vdwl; + this->eflag_either = eflag_either; + this->eflag_global = eflag_global; + this->eflag_atom = eflag_atom; + this->d_eatom = eatom; + this->ntype = ntype; + this->type = type; + this->fmap = fmap; + + // Complete the calculation of density + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,ev); + *eng_vdwl = ev.evdwl; +} + +template +KOKKOS_INLINE_FUNCTION +void MEAMKokkos::operator()(TagMEAMDensFinal, const int &i, EV_FLOAT& ev) const { + + lattice_t elti; + int m; + int errorflag; + F_FLOAT rhob, G, dG, Gbar, dGbar, gam, shp[3], Z; + F_FLOAT B, denom, rho_bkgd; + + // may or may not be legal + elti = static_cast(fmap[type[i]]); + if (elti >= 0) { + d_rho1[i] = 0.0; + d_rho2[i] = -1.0 / 3.0 * d_arho2b[i] * d_arho2b[i]; + d_rho3[i] = 0.0; + for (m = 0; m < 3; m++) { + d_rho1[i] = d_rho1[i] + d_arho1(i,m) * d_arho1(i,m); + d_rho3[i] = d_rho3[i] - 3.0 / 5.0 * d_arho3b(i,m) * d_arho3b(i,m); + } + for (m = 0; m < 6; m++) { + d_rho2[i] = d_rho2[i] + v2D[m] * d_arho2(i,m) * d_arho2(i,m); + } + for (m = 0; m < 10; m++) { + d_rho3[i] = d_rho3[i] + v3D[m] * d_arho3(i,m) * d_arho3(i,m); + } + + if (d_rho0[i] > 0.0) { + if (this->ialloy == 1) { + d_t_ave(i,0) = fdiv_zero_kk(d_t_ave(i,0), d_tsq_ave(i,0)); + d_t_ave(i,1) = fdiv_zero_kk(d_t_ave(i,1), d_tsq_ave(i,1)); + d_t_ave(i,2) = fdiv_zero_kk(d_t_ave(i,2), d_tsq_ave(i,2)); + } else if (this->ialloy == 2) { + d_t_ave(i,0) = this->t1_meam[elti]; + d_t_ave(i,1) = this->t2_meam[elti]; + d_t_ave(i,2) = this->t3_meam[elti]; + } else { + d_t_ave(i,0) = d_t_ave(i,0) / d_rho0[i]; + d_t_ave(i,1) = d_t_ave(i,1) / d_rho0[i]; + d_t_ave(i,2) = d_t_ave(i,2) / d_rho0[i]; + } + } + + d_gamma[i] = d_t_ave(i,0) * d_rho1[i] + d_t_ave(i,1) * d_rho2[i] + d_t_ave(i,2) * d_rho3[i]; + + if (d_rho0[i] > 0.0) { + d_gamma[i] = d_gamma[i] / (d_rho0[i] * d_rho0[i]); + } + + // need to double check, the analogous function is + // Z = get_Zij(this->lattce_meam[elti][elti]); in the non-KOKKOS version? + Z = get_Zij(elti); + + G = G_gam(d_gamma[i], this->ibar_meam[elti], errorflag); + if (errorflag != 0) + { + //char str[128]; + //sprintf(str,"MEAMKokkos library error %d",errorflag); + //error->one(FLERR,str); + return; + } + get_shpfcn(this->lattce_meam[elti][elti], shp); + if (this->ibar_meam[elti] <= 0) { + Gbar = 1.0; + dGbar = 0.0; + } else { + if (this->mix_ref_t == 1) { + gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z); + } else { + gam = (this->t1_meam[elti] * shp[0] + this->t2_meam[elti] * shp[1] + this->t3_meam[elti] * shp[2]) / + (Z * Z); + } + Gbar = G_gam(gam, this->ibar_meam[elti], errorflag); + } + d_rho[i] = d_rho0[i] * G; + + if (this->mix_ref_t == 1) { + if (this->ibar_meam[elti] <= 0) { + Gbar = 1.0; + dGbar = 0.0; + } else { + gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z); + Gbar = dG_gam(gam, this->ibar_meam[elti], dGbar); + } + rho_bkgd = this->rho0_meam[elti] * Z * Gbar; + } else { + if (this->bkgd_dyn == 1) { + rho_bkgd = this->rho0_meam[elti] * Z; + } else { + rho_bkgd = this->rho_ref_meam[elti]; + } + } + rhob = d_rho[i] / rho_bkgd; + denom = 1.0 / rho_bkgd; + + G = dG_gam(d_gamma[i], this->ibar_meam[elti], dG); + + d_dgamma1[i] = (G - 2 * dG * d_gamma[i]) * denom; + + if (!iszero_kk(d_rho0[i])) { + d_dgamma2[i] = (dG / d_rho0[i]) * denom; + } else { + d_dgamma2[i] = 0.0; + } + + // dgamma3 is nonzero only if we are using the "mixed" rule for + // computing t in the reference system (which is not correct, but + // included for backward compatibility + if (this->mix_ref_t == 1) { + d_dgamma3[i] = d_rho0[i] * G * dGbar / (Gbar * Z * Z) * denom; + } else { + d_dgamma3[i] = 0.0; + } + + B = this->A_meam[elti] * this->Ec_meam[elti][elti]; + + if (!iszero_kk(rhob)) { + if (this->emb_lin_neg == 1 && rhob <= 0) { + d_frhop[i] = -B; + } else { + d_frhop[i] = B * (log(rhob) + 1.0); + } + if (eflag_either != 0) { + if (eflag_global != 0) { + if (this->emb_lin_neg == 1 && rhob <= 0) { + //*eng_vdwl = *eng_vdwl - B * rhob; + ev.evdwl = ev.evdwl - B * rhob; + } else { + //*eng_vdwl = *eng_vdwl + B * rhob * log(rhob); + ev.evdwl = ev.evdwl + B * rhob * log(rhob); + } + } + if (eflag_atom != 0) { + if (this->emb_lin_neg == 1 && rhob <= 0) { + d_eatom[i] = d_eatom[i] - B * rhob; + } else { + d_eatom[i] = d_eatom[i] + B * rhob * log(rhob); + } + } + } + } else { + if (this->emb_lin_neg == 1) { + d_frhop[i] = -B; + } else { + d_frhop[i] = B; + } + } + } + if (errorflag) + { + //char str[128]; + //sprintf(str,"MEAMKokkos library error %d",errorflag); + //error->one(FLERR,str); + } +} diff --git a/src/KOKKOS/meam_dens_init_kokkos.h b/src/KOKKOS/meam_dens_init_kokkos.h new file mode 100644 index 0000000000..c3dfdbbcb8 --- /dev/null +++ b/src/KOKKOS/meam_dens_init_kokkos.h @@ -0,0 +1,554 @@ +#include "meam_kokkos.h" +#include "math_special.h" +#include "mpi.h" + +using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; + +template +template +KOKKOS_INLINE_FUNCTION +void MEAMKokkos::operator()(TagMEAMDensInit, const int &i, EV_FLOAT &ev) const { + int ii, offsetval; + ii = d_ilist_half[i]; + offsetval = d_offset[i]; + // Compute screening function and derivatives + this->template getscreen(ii, offsetval, x, d_numneigh_half, + d_numneigh_full, ntype, type, fmap); + + // Calculate intermediate density terms to be communicated + this->template calc_rho1(ii, ntype, type, fmap, x, d_numneigh_half, offsetval); + ev.evdwl += d_numneigh_half[i]; +} + +template +KOKKOS_INLINE_FUNCTION +void MEAMKokkos::operator()(TagMEAMInitialize, const int &i) const { + d_rho0[i] = 0.0; + d_arho2b[i] = 0.0; + d_arho1(i,0) = d_arho1(i,1) = d_arho1(i,2) = 0.0; + for (int j = 0; j < 6; j++) + d_arho2(i,j) = 0.0; + for (int j = 0; j < 10; j++) + d_arho3(i,j) = 0.0; + d_arho3b(i,0) = d_arho3b(i,1) = d_arho3b(i,2) = 0.0; + d_t_ave(i,0) = d_t_ave(i,1) = d_t_ave(i,2) = 0.0; + d_tsq_ave(i,0) = d_tsq_ave(i,1) = d_tsq_ave(i,2) = 0.0; +} + +template +void +MEAMKokkos::meam_dens_setup(int atom_nmax, int nall, int n_neigh) +{ + MemoryKokkos *memoryKK = (MemoryKokkos *)memory; + + // grow local arrays if necessary + + if (atom_nmax > nmax) { + memoryKK->destroy_kokkos(k_rho,rho); + memoryKK->destroy_kokkos(k_rho0,rho0); + memoryKK->destroy_kokkos(k_rho1,rho1); + memoryKK->destroy_kokkos(k_rho2,rho2); + memoryKK->destroy_kokkos(k_rho3,rho3); + memoryKK->destroy_kokkos(k_frhop,frhop); + memoryKK->destroy_kokkos(k_gamma,gamma); + memoryKK->destroy_kokkos(k_dgamma1,dgamma1); + memoryKK->destroy_kokkos(k_dgamma2,dgamma2); + memoryKK->destroy_kokkos(k_dgamma3,dgamma3); + memoryKK->destroy_kokkos(k_arho2b,arho2b); + memoryKK->destroy_kokkos(k_arho1,arho1); + memoryKK->destroy_kokkos(k_arho2,arho2); + memoryKK->destroy_kokkos(k_arho3,arho3); + memoryKK->destroy_kokkos(k_arho3b,arho3b); + memoryKK->destroy_kokkos(k_t_ave,t_ave); + memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave); + + nmax = atom_nmax; +// memory->create(rho, nmax, "pair:rho"); + k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax); + d_rho = k_rho.template view(); + h_rho = k_rho.h_view; + // memory->create(rho0, nmax, "pair:rho0"); + k_rho0 = DAT::tdual_ffloat_1d("pair:rho0",nmax); + d_rho0 = k_rho0.template view(); + h_rho0 = k_rho0.h_view; + //memory->create(rho1, nmax, "pair:rho1"); + k_rho1 = DAT::tdual_ffloat_1d("pair:rho1",nmax); + d_rho1 = k_rho1.template view(); + h_rho1 = k_rho1.h_view; + //memory->create(rho2, nmax, "pair:rho2"); + k_rho2 = DAT::tdual_ffloat_1d("pair:rho2",nmax); + d_rho2 = k_rho2.template view(); + h_rho2 = k_rho2.h_view; + //memory->create(rho3, nmax, "pair:rho3"); + k_rho3 = DAT::tdual_ffloat_1d("pair:rho3",nmax); + d_rho3 = k_rho3.template view(); + h_rho3 = k_rho3.h_view; + //memory->create(frhop, nmax, "pair:frhop"); + k_frhop = DAT::tdual_ffloat_1d("pair:frhop",nmax); + d_frhop = k_frhop.template view(); + h_frhop = k_frhop.h_view; + //memory->create(gamma, nmax, "pair:gamma"); + k_gamma = DAT::tdual_ffloat_1d("pair:gamma",nmax); + d_gamma = k_gamma.template view(); + h_gamma = k_gamma.h_view; + //memory->create(dgamma1, nmax, "pair:dgamma1"); + k_dgamma1 = DAT::tdual_ffloat_1d("pair:dgamma1",nmax); + d_dgamma1 = k_dgamma1.template view(); + h_dgamma1 = k_dgamma1.h_view; + //memory->create(dgamma2, nmax, "pair:dgamma2"); + k_dgamma2 = DAT::tdual_ffloat_1d("pair:dgamma2",nmax); + d_dgamma2 = k_dgamma2.template view(); + h_dgamma2 = k_dgamma2.h_view; + //memory->create(dgamma3, nmax, "pair:dgamma3"); + k_dgamma3 = DAT::tdual_ffloat_1d("pair:dgamma3",nmax); + d_dgamma3 = k_dgamma3.template view(); + h_dgamma3 = k_dgamma3.h_view; + //memory->create(arho2b, nmax, "pair:arho2b"); + k_arho2b = DAT::tdual_ffloat_1d("pair:arho2b",nmax); + d_arho2b = k_arho2b.template view(); + h_arho2b = k_arho2b.h_view; + //memory->create(arho1, nmax, 3, "pair:arho1"); + k_arho1 = DAT::tdual_ffloat_2d("pair:arho1",nmax, 3); + d_arho1 = k_arho1.template view(); + h_arho1 = k_arho1.h_view; + //memory->create(arho2, nmax, 6, "pair:arho2"); + k_arho2 = DAT::tdual_ffloat_2d("pair:arho2",nmax, 6); + d_arho2 = k_arho2.template view(); + h_arho2 = k_arho2.h_view; + //memory->create(arho3, nmax, 10, "pair:arho3"); + k_arho3 = DAT::tdual_ffloat_2d("pair:arho3",nmax, 10); + d_arho3 = k_arho3.template view(); + h_arho3 = k_arho3.h_view; + //memory->create(arho3b, nmax, 3, "pair:arho3b"); + k_arho3b = DAT::tdual_ffloat_2d("pair:arho3b",nmax, 3); + d_arho3b = k_arho3b.template view(); + h_arho3b = k_arho3b.h_view; + //memory->create(t_ave, nmax, 3, "pair:t_ave"); + k_t_ave = DAT::tdual_ffloat_2d("pair:t_ave",nmax, 3); + d_t_ave = k_t_ave.template view(); + h_t_ave = k_t_ave.h_view; + //memory->create(tsq_ave, nmax, 3, "pair:tsq_ave"); + k_tsq_ave = DAT::tdual_ffloat_2d("pair:tsq_ave",nmax, 3); + d_tsq_ave = k_tsq_ave.template view(); + h_tsq_ave = k_tsq_ave.h_view; + } + + if (n_neigh > maxneigh) { + memoryKK->destroy_kokkos(k_scrfcn,scrfcn); + memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn); + memoryKK->destroy_kokkos(k_fcpair,fcpair); + maxneigh = n_neigh; + // memory->create(scrfcn, maxneigh, "pair:scrfcn"); + k_scrfcn = DAT::tdual_ffloat_1d("pair:scrfcn", maxneigh); + d_scrfcn = k_scrfcn.template view(); + h_scrfcn = k_scrfcn.h_view; + //memory->create(dscrfcn, maxneigh, "pair:dscrfcn"); + k_dscrfcn = DAT::tdual_ffloat_1d("pair:dscrfcn", maxneigh); + d_dscrfcn = k_dscrfcn.template view(); + h_dscrfcn = k_dscrfcn.h_view; + //memory->create(fcpair, maxneigh, "pair:fcpair"); + k_fcpair = DAT::tdual_ffloat_1d("pair:fcpair", maxneigh); + d_fcpair = k_fcpair.template view(); + h_fcpair = k_fcpair.h_view; + } + + // zero out local arrays + + Kokkos::parallel_for(Kokkos::RangePolicy(0, nall),*this); +} + +template +void +MEAMKokkos::meam_dens_init(int inum_half, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread d_numneigh_half, typename AT::t_int_1d_randomread d_numneigh_full, + int *fnoffset, typename AT::t_int_1d_randomread d_ilist_half, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, typename AT::t_int_1d_randomread d_offset, int neighflag) +{ + EV_FLOAT ev; + + ev.evdwl = 0; + this->ntype = ntype; + this->type = type; + this->fmap = fmap; + this->x = x; + this->d_numneigh_half = d_numneigh_half; + this->d_numneigh_full = d_numneigh_full; + this->d_ilist_half = d_ilist_half; + this->d_neighbors_half = d_neighbors_half; + this->d_neighbors_full = d_neighbors_full; + this->d_offset = d_offset; + if (neighflag == FULL) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum_half),*this, ev); + else if (neighflag == HALF) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum_half),*this, ev); + else if (neighflag == HALFTHREAD) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum_half),*this, ev); + *fnoffset = (int)ev.evdwl; +} + +// ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +template +template +KOKKOS_INLINE_FUNCTION +void +MEAMKokkos::getscreen(int i, int offset, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh_half, + typename AT::t_int_1d_randomread numneigh_full, int /*ntype*/, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap) +const { + int jn, j, kn, k; + int elti, eltj, eltk; + X_FLOAT xitmp, yitmp, zitmp, delxij, delyij, delzij; + F_FLOAT rij2, rij; + X_FLOAT xjtmp, yjtmp, zjtmp, delxik, delyik, delzik; + F_FLOAT rik2 /*,rik*/; + X_FLOAT xktmp, yktmp, zktmp, delxjk, delyjk, delzjk; + F_FLOAT rjk2 /*,rjk*/; + X_FLOAT xik, xjk; + F_FLOAT sij, fcij, sfcij, dfcij, sikj, dfikj, cikj; + F_FLOAT Cmin, Cmax, delc, /*ebound,*/ a, coef1, coef2; + F_FLOAT dCikj; + F_FLOAT rnorm, fc, dfc, drinv; + + drinv = 1.0 / this->delr_meam; + elti = fmap[type[i]]; + if (elti < 0) return; + + xitmp = x(i,0); + yitmp = x(i,1); + zitmp = x(i,2); + + for (jn = 0; jn < numneigh_half[i]; jn++) { + //j = firstneigh[jn]; + j = d_neighbors_half(i,jn); + + eltj = fmap[type[j]]; + if (eltj < 0) continue; + + // First compute screening function itself, sij + xjtmp = x(j,0); + yjtmp = x(j,1); + zjtmp = x(j,2); + delxij = xjtmp - xitmp; + delyij = yjtmp - yitmp; + delzij = zjtmp - zitmp; + + rij2 = delxij * delxij + delyij * delyij + delzij * delzij; + rij = sqrt(rij2); + + double rbound = this->ebound_meam[elti][eltj] * rij2; + if (rij > this->rc_meam) { + fcij = 0.0; + dfcij = 0.0; + sij = 0.0; + } else { + rnorm = (this->rc_meam - rij) * drinv; + sij = 1.0; + + // if rjk2 > ebound*rijsq, atom k is definitely outside the ellipse + for (kn = 0; kn < numneigh_full[i]; kn++) { + //k = firstneigh_full[kn]; + k = d_neighbors_full(i,kn); + eltk = fmap[type[k]]; + if (eltk < 0) continue; + if (k == j) continue; + + delxjk = x(k,0) - xjtmp; + delyjk = x(k,1) - yjtmp; + delzjk = x(k,2) - zjtmp; + rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk; + if (rjk2 > rbound) continue; + + delxik = x(k,0) - xitmp; + delyik = x(k,1) - yitmp; + delzik = x(k,2) - zitmp; + rik2 = delxik * delxik + delyik * delyik + delzik * delzik; + if (rik2 > rbound) continue; + + xik = rik2 / rij2; + xjk = rjk2 / rij2; + a = 1 - (xik - xjk) * (xik - xjk); + // if a < 0, then ellipse equation doesn't describe this case and + // atom k can't possibly screen i-j + if (a <= 0.0) continue; + + cikj = (2.0 * (xik + xjk) + a - 2.0) / a; + Cmax = this->Cmax_meam[elti][eltj][eltk]; + Cmin = this->Cmin_meam[elti][eltj][eltk]; + if (cikj >= Cmax) continue; + // note that cikj may be slightly negative (within numerical + // tolerance) if atoms are colinear, so don't reject that case here + // (other negative cikj cases were handled by the test on "a" above) + else if (cikj <= Cmin) { + sij = 0.0; + break; + } else { + delc = Cmax - Cmin; + cikj = (cikj - Cmin) / delc; + sikj = fcut(cikj); + } + sij *= sikj; + } + + fc = dfcut(rnorm, dfc); + fcij = fc; + dfcij = dfc * drinv; + } + // Now compute derivatives + d_dscrfcn[offset+jn] = 0.0; + sfcij = sij * fcij; + if (iszero_kk(sfcij) || iszero_kk(sfcij - 1.0)) + { + d_scrfcn[offset+jn] = sij; + d_fcpair[offset+jn] = fcij; + continue; + //goto LABEL_100; + } + + for (kn = 0; kn < numneigh_full[i]; kn++) { + //k = firstneigh_full[kn]; + k = d_neighbors_full(i,kn); + if (k == j) continue; + eltk = fmap[type[k]]; + if (eltk < 0) continue; + + xktmp = x(k,0); + yktmp = x(k,1); + zktmp = x(k,2); + delxjk = xktmp - xjtmp; + delyjk = yktmp - yjtmp; + delzjk = zktmp - zjtmp; + rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk; + if (rjk2 > rbound) continue; + + delxik = xktmp - xitmp; + delyik = yktmp - yitmp; + delzik = zktmp - zitmp; + rik2 = delxik * delxik + delyik * delyik + delzik * delzik; + if (rik2 > rbound) continue; + + xik = rik2 / rij2; + xjk = rjk2 / rij2; + a = 1 - (xik - xjk) * (xik - xjk); + // if a < 0, then ellipse equation doesn't describe this case and + // atom k can't possibly screen i-j + if (a <= 0.0) continue; + + cikj = (2.0 * (xik + xjk) + a - 2.0) / a; + Cmax = this->Cmax_meam[elti][eltj][eltk]; + Cmin = this->Cmin_meam[elti][eltj][eltk]; + if (cikj >= Cmax) { + continue; + // Note that cikj may be slightly negative (within numerical + // tolerance) if atoms are colinear, so don't reject that case + // here + // (other negative cikj cases were handled by the test on "a" + // above) + // Note that we never have 0 +template +KOKKOS_INLINE_FUNCTION +void +MEAMKokkos::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh, + int offset) const +{ + int jn, j, m, n, p, elti, eltj; + int nv2, nv3; + X_FLOAT xtmp, ytmp, ztmp, delij[3]; + F_FLOAT rij2, rij, sij; + F_FLOAT ai, aj, rhoa0j, rhoa1j, rhoa2j, rhoa3j, A1j, A2j, A3j; + // double G,Gbar,gam,shp[3+1]; + F_FLOAT ro0i, ro0j; + F_FLOAT rhoa0i, rhoa1i, rhoa2i, rhoa3i, A1i, A2i, A3i; + + // Likely to do: replace with duplicated view for OpenMP, atomic view for GPU abstraction + Kokkos::View::value> > a_rho0 = d_rho0; + Kokkos::View::value> > a_arho2b = d_arho2b; + Kokkos::View::value> > a_t_ave = d_t_ave; + Kokkos::View::value> > a_tsq_ave = d_tsq_ave; + Kokkos::View::value> > a_arho1 = d_arho1; + Kokkos::View::value> > a_arho2 = d_arho2; + Kokkos::View::value> > a_arho3 = d_arho3; + Kokkos::View::value> > a_arho3b = d_arho3b; + + elti = fmap[type[i]]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + for (jn = 0; jn < numneigh[i]; jn++) { + if (!iszero_kk(d_scrfcn[offset+jn])) { + //j = firstneigh[jn]; + j = d_neighbors_half(i,jn); + sij = d_scrfcn[offset+jn] * d_fcpair[offset+jn]; + delij[0] = x(j,0) - xtmp; + delij[1] = x(j,1) - ytmp; + delij[2] = x(j,2) - ztmp; + rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2]; + if (rij2 < this->cutforcesq) { + eltj = fmap[type[j]]; + rij = sqrt(rij2); + ai = rij / this->re_meam[elti][elti] - 1.0; + aj = rij / this->re_meam[eltj][eltj] - 1.0; + ro0i = this->rho0_meam[elti]; + ro0j = this->rho0_meam[eltj]; + rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-this->beta0_meam[eltj] * aj) * sij; + rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-this->beta1_meam[eltj] * aj) * sij; + rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-this->beta2_meam[eltj] * aj) * sij; + rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-this->beta3_meam[eltj] * aj) * sij; + rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-this->beta0_meam[elti] * ai) * sij; + rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-this->beta1_meam[elti] * ai) * sij; + rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-this->beta2_meam[elti] * ai) * sij; + rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-this->beta3_meam[elti] * ai) * sij; + if (this->ialloy == 1) { + rhoa1j = rhoa1j * this->t1_meam[eltj]; + rhoa2j = rhoa2j * this->t2_meam[eltj]; + rhoa3j = rhoa3j * this->t3_meam[eltj]; + rhoa1i = rhoa1i * this->t1_meam[elti]; + rhoa2i = rhoa2i * this->t2_meam[elti]; + rhoa3i = rhoa3i * this->t3_meam[elti]; + } + a_rho0[i] += rhoa0j; + a_rho0[j] += rhoa0i; + // For ialloy = 2, use single-element value (not average) + if (this->ialloy != 2) { + a_t_ave(i,0) = a_t_ave(i,0) + this->t1_meam[eltj] * rhoa0j; + a_t_ave(i,1) = a_t_ave(i,1) + this->t2_meam[eltj] * rhoa0j; + a_t_ave(i,2) = a_t_ave(i,2) + this->t3_meam[eltj] * rhoa0j; + a_t_ave(j,0) = a_t_ave(j,0) + this->t1_meam[elti] * rhoa0i; + a_t_ave(j,1) = a_t_ave(j,1) + this->t2_meam[elti] * rhoa0i; + a_t_ave(j,2) = a_t_ave(j,2) + this->t3_meam[elti] * rhoa0i; + } + if (this->ialloy == 1) { + a_tsq_ave(i,0) = a_tsq_ave(i,0) + this->t1_meam[eltj] * this->t1_meam[eltj] * rhoa0j; + a_tsq_ave(i,1) = a_tsq_ave(i,1) + this->t2_meam[eltj] * this->t2_meam[eltj] * rhoa0j; + a_tsq_ave(i,2) = a_tsq_ave(i,2) + this->t3_meam[eltj] * this->t3_meam[eltj] * rhoa0j; + a_tsq_ave(j,0) = a_tsq_ave(j,0) + this->t1_meam[elti] * this->t1_meam[elti] * rhoa0i; + a_tsq_ave(j,1) = a_tsq_ave(j,1) + this->t2_meam[elti] * this->t2_meam[elti] * rhoa0i; + a_tsq_ave(j,2) = a_tsq_ave(j,2) + this->t3_meam[elti] * this->t3_meam[elti] * rhoa0i; + } + a_arho2b[i] += rhoa2j; + a_arho2b[j] += rhoa2i; + + A1j = rhoa1j / rij; + A2j = rhoa2j / rij2; + A3j = rhoa3j / (rij2 * rij); + A1i = rhoa1i / rij; + A2i = rhoa2i / rij2; + A3i = rhoa3i / (rij2 * rij); + nv2 = 0; + nv3 = 0; + for (m = 0; m < 3; m++) { + a_arho1(i,m) += A1j * delij[m]; + a_arho1(j,m) += (-A1i * delij[m]); + a_arho3b(i,m) += rhoa3j * delij[m] / rij; + a_arho3b(j,m) += (- rhoa3i * delij[m] / rij); + for (n = m; n < 3; n++) { + a_arho2(i,nv2) += A2j * delij[m] * delij[n]; + a_arho2(j,nv2) += A2i * delij[m] * delij[n]; + nv2 = nv2 + 1; + for (p = n; p < 3; p++) { + a_arho3(i,nv3) += A3j * delij[m] * delij[n] * delij[p]; + a_arho3(j,nv3) += (- A3i * delij[m] * delij[n] * delij[p]); + nv3 = nv3 + 1; + } + } + } + } + } + } +} + +//Cutoff function and derivative + +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::dfcut(const double xi, double& dfc) const { + double a, a3, a4, a1m4; + if (xi >= 1.0) { + dfc = 0.0; + return 1.0; + } else if (xi <= 0.0) { + dfc = 0.0; + return 0.0; + } else { + a = 1.0 - xi; + a3 = a * a * a; + a4 = a * a3; + a1m4 = 1.0-a4; + + dfc = 8 * a1m4 * a3; + return a1m4*a1m4; + } + } + + //----------------------------------------------------------------------------- + // // Derivative of Cikj w.r.t. rij + // // Inputs: rij,rij2,rik2,rjk2 + // // + // + +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::dCfunc(const double rij2, const double rik2, const double rjk2) const +{ + double rij4, a, asq, b,denom; + + rij4 = rij2 * rij2; + a = rik2 - rjk2; + b = rik2 + rjk2; + asq = a*a; + denom = rij4 - asq; + denom = denom * denom; + return -4 * (-2 * rij2 * asq + rij4 * b + asq * b) / denom; +} + +template +KOKKOS_INLINE_FUNCTION +void MEAMKokkos::dCfunc2(const double rij2, const double rik2, const double rjk2, double& dCikj1, double& dCikj2) const +{ + double rij4, rik4, rjk4, a, denom; + + rij4 = rij2 * rij2; + rik4 = rik2 * rik2; + rjk4 = rjk2 * rjk2; + a = rik2 - rjk2; + denom = rij4 - a * a; + denom = denom * denom; + dCikj1 = 4 * rij2 * (rij4 + rik4 + 2 * rik2 * rjk2 - 3 * rjk4 - 2 * rij2 * a) / denom; + dCikj2 = 4 * rij2 * (rij4 - 3 * rik4 + 2 * rik2 * rjk2 + rjk4 + 2 * rij2 * a) / denom; + +} +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::fcut(const double xi) const{ + double a; + if (xi >= 1.0) + return 1.0; + else if (xi <= 0.0) + return 0.0; + else { + a = 1.0 - xi; + a *= a; a *= a; + a = 1.0 - a; + return a * a; + } + } + diff --git a/src/KOKKOS/meam_force_kokkos.h b/src/KOKKOS/meam_force_kokkos.h new file mode 100644 index 0000000000..4e061c9043 --- /dev/null +++ b/src/KOKKOS/meam_force_kokkos.h @@ -0,0 +1,554 @@ +#include "meam_kokkos.h" +#include "math_special_kokkos.h" +#include + +using namespace LAMMPS_NS; +using namespace MathSpecialKokkos; + + +template +void +MEAMKokkos::meam_force(int inum_half, int eflag_either, int eflag_global, int eflag_atom, int vflag_atom, double* eng_vdwl, + typename ArrayTypes::t_efloat_1d eatom, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh, + typename AT::t_int_1d_randomread numneigh_full, typename AT::t_f_array f, typename ArrayTypes::t_virial_array vatom, typename AT::t_int_1d_randomread d_ilist_half, typename AT::t_int_1d_randomread d_offset, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, int neighflag) +{ + EV_FLOAT ev; + ev.evdwl = *eng_vdwl; + + this->eflag_either = eflag_either; + this->eflag_global = eflag_global; + this->eflag_atom = eflag_atom; + this->vflag_atom = vflag_atom; + this->d_eatom = eatom; + this->ntype = ntype; + this->type = type; + this->fmap = fmap; + this->x = x; + this->d_numneigh_half = numneigh; + this->d_numneigh_full = numneigh_full; + this->d_neighbors_half = d_neighbors_half; + this->d_neighbors_full = d_neighbors_full; + this->f = f; + this->d_vatom = vatom; + this->d_ilist_half = d_ilist_half; + this->d_offset = d_offset; + + if (neighflag == FULL) + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum_half),*this,ev); + else if (neighflag == HALF) + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum_half),*this,ev); + else if (neighflag == HALFTHREAD) + Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum_half),*this,ev); + *eng_vdwl = ev.evdwl; +} + +template +template +KOKKOS_INLINE_FUNCTION +void +MEAMKokkos::operator()(TagMEAMforce, const int &ii, EV_FLOAT& ev) const { + int i, j, jn, k, kn, kk, m, n, p, q; + int nv2, nv3, elti, eltj, eltk, ind; + X_FLOAT xitmp, yitmp, zitmp, delij[3]; + F_FLOAT rij2, rij, rij3; + F_FLOAT v[6], fi[3], fj[3]; + F_FLOAT third, sixth; + F_FLOAT pp, dUdrij, dUdsij, dUdrijm[3], force, forcem; + F_FLOAT r, recip, phi, phip; + F_FLOAT sij; + F_FLOAT a1, a1i, a1j, a2, a2i, a2j; + F_FLOAT a3i, a3j; + F_FLOAT shpi[3], shpj[3]; + F_FLOAT ai, aj, ro0i, ro0j, invrei, invrej; + F_FLOAT rhoa0j, drhoa0j, rhoa0i, drhoa0i; + F_FLOAT rhoa1j, drhoa1j, rhoa1i, drhoa1i; + F_FLOAT rhoa2j, drhoa2j, rhoa2i, drhoa2i; + F_FLOAT a3, a3a, rhoa3j, drhoa3j, rhoa3i, drhoa3i; + F_FLOAT drho0dr1, drho0dr2, drho0ds1, drho0ds2; + F_FLOAT drho1dr1, drho1dr2, drho1ds1, drho1ds2; + F_FLOAT drho1drm1[3], drho1drm2[3]; + F_FLOAT drho2dr1, drho2dr2, drho2ds1, drho2ds2; + F_FLOAT drho2drm1[3], drho2drm2[3]; + F_FLOAT drho3dr1, drho3dr2, drho3ds1, drho3ds2; + F_FLOAT drho3drm1[3], drho3drm2[3]; + F_FLOAT dt1dr1, dt1dr2, dt1ds1, dt1ds2; + F_FLOAT dt2dr1, dt2dr2, dt2ds1, dt2ds2; + F_FLOAT dt3dr1, dt3dr2, dt3ds1, dt3ds2; + F_FLOAT drhodr1, drhodr2, drhods1, drhods2, drhodrm1[3], drhodrm2[3]; + F_FLOAT arg; + F_FLOAT arg1i1, arg1j1, arg1i2, arg1j2, arg1i3, arg1j3, arg3i3, arg3j3; + F_FLOAT dsij1, dsij2, force1, force2; + F_FLOAT t1i, t2i, t3i, t1j, t2j, t3j; + int fnoffset; + + // To do: update with duplication for OpenMP, atomic for GPUs + Kokkos::View::value> > a_eatom = d_eatom; + Kokkos::View::value> > a_vatom = d_vatom; + Kokkos::View::value> > a_f = f; + + + i = d_ilist_half[ii]; + fnoffset = d_offset[i]; + third = 1.0 / 3.0; + sixth = 1.0 / 6.0; + // Compute forces atom i + + elti = fmap[type[i]]; + if (elti < 0) return; + + xitmp = x(i,0); + yitmp = x(i,1); + zitmp = x(i,2); + + // Treat each pair + for (jn = 0; jn < d_numneigh_half[i]; jn++) { + //j = firstneigh[jn]; + j = d_neighbors_half(i,jn); + eltj = fmap[type[j]]; + + if (!iszero_kk(d_scrfcn[fnoffset + jn]) && eltj >= 0) { + + sij = d_scrfcn[fnoffset + jn] * d_fcpair[fnoffset + jn]; + delij[0] = x(j,0) - xitmp; + delij[1] = x(j,1) - yitmp; + delij[2] = x(j,2) - zitmp; + rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2]; + if (rij2 < this->cutforcesq) { + rij = sqrt(rij2); + r = rij; + + // Compute phi and phip + ind = this->eltind[elti][eltj]; + pp = rij * this->rdrar; + kk = (int)pp; + kk = (kk <= (this->nrar - 2))?kk:this->nrar - 2; + pp = pp - kk; + pp = (pp <= 1.0)?pp:1.0; + phi = ((this->d_phirar3(ind,kk) * pp + this->d_phirar2(ind,kk)) * pp + this->d_phirar1(ind,kk)) * pp + + this->d_phirar(ind,kk); + phip = (this->d_phirar6(ind,kk) * pp + this->d_phirar5(ind,kk)) * pp + this->d_phirar4(ind,kk); + recip = 1.0 / r; + + if (eflag_either != 0) { + if (eflag_global != 0) + //*eng_vdwl = *eng_vdwl + phi * sij; + ev.evdwl = ev.evdwl + phi * sij; + if (eflag_atom != 0) { + a_eatom[i] += (0.5 * phi * sij); + a_eatom[j] += (0.5 * phi * sij); + } + } + + // write(1,*) "force_meamf: phi: ",phi + // write(1,*) "force_meamf: phip: ",phip + + // Compute pair densities and derivatives + invrei = 1.0 / this->re_meam[elti][elti]; + ai = rij * invrei - 1.0; + ro0i = this->rho0_meam[elti]; + rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-this->beta0_meam[elti] * ai); + drhoa0i = -this->beta0_meam[elti] * invrei * rhoa0i; + rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-this->beta1_meam[elti] * ai); + drhoa1i = -this->beta1_meam[elti] * invrei * rhoa1i; + rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-this->beta2_meam[elti] * ai); + drhoa2i = -this->beta2_meam[elti] * invrei * rhoa2i; + rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-this->beta3_meam[elti] * ai); + drhoa3i = -this->beta3_meam[elti] * invrei * rhoa3i; + + if (elti != eltj) { + invrej = 1.0 / this->re_meam[eltj][eltj]; + aj = rij * invrej - 1.0; + ro0j = this->rho0_meam[eltj]; + rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-this->beta0_meam[eltj] * aj); + drhoa0j = -this->beta0_meam[eltj] * invrej * rhoa0j; + rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-this->beta1_meam[eltj] * aj); + drhoa1j = -this->beta1_meam[eltj] * invrej * rhoa1j; + rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-this->beta2_meam[eltj] * aj); + drhoa2j = -this->beta2_meam[eltj] * invrej * rhoa2j; + rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-this->beta3_meam[eltj] * aj); + drhoa3j = -this->beta3_meam[eltj] * invrej * rhoa3j; + } else { + rhoa0j = rhoa0i; + drhoa0j = drhoa0i; + rhoa1j = rhoa1i; + drhoa1j = drhoa1i; + rhoa2j = rhoa2i; + drhoa2j = drhoa2i; + rhoa3j = rhoa3i; + drhoa3j = drhoa3i; + } + + const double t1mi = this->t1_meam[elti]; + const double t2mi = this->t2_meam[elti]; + const double t3mi = this->t3_meam[elti]; + const double t1mj = this->t1_meam[eltj]; + const double t2mj = this->t2_meam[eltj]; + const double t3mj = this->t3_meam[eltj]; + + if (this->ialloy == 1) { + rhoa1j *= t1mj; + rhoa2j *= t2mj; + rhoa3j *= t3mj; + rhoa1i *= t1mi; + rhoa2i *= t2mi; + rhoa3i *= t3mi; + drhoa1j *= t1mj; + drhoa2j *= t2mj; + drhoa3j *= t3mj; + drhoa1i *= t1mi; + drhoa2i *= t2mi; + drhoa3i *= t3mi; + } + + nv2 = 0; + nv3 = 0; + arg1i1 = 0.0; + arg1j1 = 0.0; + arg1i2 = 0.0; + arg1j2 = 0.0; + arg1i3 = 0.0; + arg1j3 = 0.0; + arg3i3 = 0.0; + arg3j3 = 0.0; + for (n = 0; n < 3; n++) { + for (p = n; p < 3; p++) { + for (q = p; q < 3; q++) { + arg = delij[n] * delij[p] * delij[q] * this->v3D[nv3]; + arg1i3 = arg1i3 + d_arho3(i,nv3) * arg; + arg1j3 = arg1j3 - d_arho3(j,nv3) * arg; + nv3 = nv3 + 1; + } + arg = delij[n] * delij[p] * this->v2D[nv2]; + arg1i2 = arg1i2 + d_arho2(i,nv2) * arg; + arg1j2 = arg1j2 + d_arho2(j,nv2) * arg; + nv2 = nv2 + 1; + } + arg1i1 = arg1i1 + d_arho1(i,n) * delij[n]; + arg1j1 = arg1j1 - d_arho1(j,n) * delij[n]; + arg3i3 = arg3i3 + d_arho3b(i,n) * delij[n]; + arg3j3 = arg3j3 - d_arho3b(j,n) * delij[n]; + } + + // rho0 terms + drho0dr1 = drhoa0j * sij; + drho0dr2 = drhoa0i * sij; + + // rho1 terms + a1 = 2 * sij / rij; + drho1dr1 = a1 * (drhoa1j - rhoa1j / rij) * arg1i1; + drho1dr2 = a1 * (drhoa1i - rhoa1i / rij) * arg1j1; + a1 = 2.0 * sij / rij; + for (m = 0; m < 3; m++) { + drho1drm1[m] = a1 * rhoa1j * d_arho1(i,m); + drho1drm2[m] = -a1 * rhoa1i * d_arho1(j,m); + } + + // rho2 terms + a2 = 2 * sij / rij2; + drho2dr1 = a2 * (drhoa2j - 2 * rhoa2j / rij) * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * drhoa2j * sij; + drho2dr2 = a2 * (drhoa2i - 2 * rhoa2i / rij) * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * drhoa2i * sij; + a2 = 4 * sij / rij2; + for (m = 0; m < 3; m++) { + drho2drm1[m] = 0.0; + drho2drm2[m] = 0.0; + for (n = 0; n < 3; n++) { + drho2drm1[m] = drho2drm1[m] + d_arho2(i,this->vind2D[m][n]) * delij[n]; + drho2drm2[m] = drho2drm2[m] - d_arho2(j,this->vind2D[m][n]) * delij[n]; + } + drho2drm1[m] = a2 * rhoa2j * drho2drm1[m]; + drho2drm2[m] = -a2 * rhoa2i * drho2drm2[m]; + } + + // rho3 terms + rij3 = rij * rij2; + a3 = 2 * sij / rij3; + a3a = 6.0 / 5.0 * sij / rij; + drho3dr1 = a3 * (drhoa3j - 3 * rhoa3j / rij) * arg1i3 - a3a * (drhoa3j - rhoa3j / rij) * arg3i3; + drho3dr2 = a3 * (drhoa3i - 3 * rhoa3i / rij) * arg1j3 - a3a * (drhoa3i - rhoa3i / rij) * arg3j3; + a3 = 6 * sij / rij3; + a3a = 6 * sij / (5 * rij); + for (m = 0; m < 3; m++) { + drho3drm1[m] = 0.0; + drho3drm2[m] = 0.0; + nv2 = 0; + for (n = 0; n < 3; n++) { + for (p = n; p < 3; p++) { + arg = delij[n] * delij[p] * this->v2D[nv2]; + drho3drm1[m] = drho3drm1[m] + d_arho3(i,this->vind3D[m][n][p]) * arg; + drho3drm2[m] = drho3drm2[m] + d_arho3(j,this->vind3D[m][n][p]) * arg; + nv2 = nv2 + 1; + } + } + drho3drm1[m] = (a3 * drho3drm1[m] - a3a * d_arho3b(i,m)) * rhoa3j; + drho3drm2[m] = (-a3 * drho3drm2[m] + a3a * d_arho3b(j,m)) * rhoa3i; + } + + // Compute derivatives of weighting functions t wrt rij + t1i = d_t_ave(i,0); + t2i = d_t_ave(i,1); + t3i = d_t_ave(i,2); + t1j = d_t_ave(j,0); + t2j = d_t_ave(j,1); + t3j = d_t_ave(j,2); + + if (this->ialloy == 1) { + + a1i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i,0)); + a1j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j,0)); + a2i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i,1)); + a2j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j,1)); + a3i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i,2)); + a3j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j,2)); + + dt1dr1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj)); + dt1dr2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi)); + dt2dr1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj)); + dt2dr2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi)); + dt3dr1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj)); + dt3dr2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi)); + + } else if (this->ialloy == 2) { + + dt1dr1 = 0.0; + dt1dr2 = 0.0; + dt2dr1 = 0.0; + dt2dr2 = 0.0; + dt3dr1 = 0.0; + dt3dr2 = 0.0; + + } else { + + ai = 0.0; + if (!iszero_kk(d_rho0[i])) + ai = drhoa0j * sij / d_rho0[i]; + aj = 0.0; + if (!iszero_kk(d_rho0[j])) + aj = drhoa0i * sij / d_rho0[j]; + + dt1dr1 = ai * (t1mj - t1i); + dt1dr2 = aj * (t1mi - t1j); + dt2dr1 = ai * (t2mj - t2i); + dt2dr2 = aj * (t2mi - t2j); + dt3dr1 = ai * (t3mj - t3i); + dt3dr2 = aj * (t3mi - t3j); + } + + // Compute derivatives of total density wrt rij, sij and rij(3) + get_shpfcn(this->lattce_meam[elti][elti], shpi); + get_shpfcn(this->lattce_meam[eltj][eltj], shpj); + drhodr1 = dgamma1[i] * drho0dr1 + + dgamma2[i] * (dt1dr1 * d_rho1[i] + t1i * drho1dr1 + dt2dr1 * d_rho2[i] + t2i * drho2dr1 + + dt3dr1 * d_rho3[i] + t3i * drho3dr1) - + dgamma3[i] * (shpi[0] * dt1dr1 + shpi[1] * dt2dr1 + shpi[2] * dt3dr1); + drhodr2 = dgamma1[j] * drho0dr2 + + dgamma2[j] * (dt1dr2 * d_rho1[j] + t1j * drho1dr2 + dt2dr2 * d_rho2[j] + t2j * drho2dr2 + + dt3dr2 * d_rho3[j] + t3j * drho3dr2) - + dgamma3[j] * (shpj[0] * dt1dr2 + shpj[1] * dt2dr2 + shpj[2] * dt3dr2); + for (m = 0; m < 3; m++) { + drhodrm1[m] = 0.0; + drhodrm2[m] = 0.0; + drhodrm1[m] = dgamma2[i] * (t1i * drho1drm1[m] + t2i * drho2drm1[m] + t3i * drho3drm1[m]); + drhodrm2[m] = dgamma2[j] * (t1j * drho1drm2[m] + t2j * drho2drm2[m] + t3j * drho3drm2[m]); + } + + // Compute derivatives wrt sij, but only if necessary + if (!iszero_kk(d_dscrfcn[fnoffset + jn])) { + drho0ds1 = rhoa0j; + drho0ds2 = rhoa0i; + a1 = 2.0 / rij; + drho1ds1 = a1 * rhoa1j * arg1i1; + drho1ds2 = a1 * rhoa1i * arg1j1; + a2 = 2.0 / rij2; + drho2ds1 = a2 * rhoa2j * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * rhoa2j; + drho2ds2 = a2 * rhoa2i * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * rhoa2i; + a3 = 2.0 / rij3; + a3a = 6.0 / (5.0 * rij); + drho3ds1 = a3 * rhoa3j * arg1i3 - a3a * rhoa3j * arg3i3; + drho3ds2 = a3 * rhoa3i * arg1j3 - a3a * rhoa3i * arg3j3; + + if (this->ialloy == 1) { + a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i,0)); + a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j,0)); + a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i,1)); + a2j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j,1)); + a3i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i,2)); + a3j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j,2)); + + dt1ds1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj)); + dt1ds2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi)); + dt2ds1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj)); + dt2ds2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi)); + dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj)); + dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi)); + + } else if (this->ialloy == 2) { + + dt1ds1 = 0.0; + dt1ds2 = 0.0; + dt2ds1 = 0.0; + dt2ds2 = 0.0; + dt3ds1 = 0.0; + dt3ds2 = 0.0; + + } else { + + ai = 0.0; + if (!iszero_kk(d_rho0[i])) + ai = rhoa0j / d_rho0[i]; + aj = 0.0; + if (!iszero_kk(d_rho0[j])) + aj = rhoa0i / d_rho0[j]; + + dt1ds1 = ai * (t1mj - t1i); + dt1ds2 = aj * (t1mi - t1j); + dt2ds1 = ai * (t2mj - t2i); + dt2ds2 = aj * (t2mi - t2j); + dt3ds1 = ai * (t3mj - t3i); + dt3ds2 = aj * (t3mi - t3j); + } + + drhods1 = d_dgamma1[i] * drho0ds1 + + d_dgamma2[i] * (dt1ds1 * d_rho1[i] + t1i * drho1ds1 + dt2ds1 * d_rho2[i] + t2i * drho2ds1 + + dt3ds1 * d_rho3[i] + t3i * drho3ds1) - + d_dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1); + drhods2 = d_dgamma1[j] * drho0ds2 + + d_dgamma2[j] * (dt1ds2 * d_rho1[j] + t1j * drho1ds2 + dt2ds2 * d_rho2[j] + t2j * drho2ds2 + + dt3ds2 * d_rho3[j] + t3j * drho3ds2) - + d_dgamma3[j] * (shpj[0] * dt1ds2 + shpj[1] * dt2ds2 + shpj[2] * dt3ds2); + } + + // Compute derivatives of energy wrt rij, sij and rij[3] + dUdrij = phip * sij + d_frhop[i] * drhodr1 + d_frhop[j] * drhodr2; + dUdsij = 0.0; + if (!iszero_kk(d_dscrfcn[fnoffset + jn])) { + dUdsij = phi + d_frhop[i] * drhods1 + d_frhop[j] * drhods2; + } + for (m = 0; m < 3; m++) { + dUdrijm[m] = d_frhop[i] * drhodrm1[m] + d_frhop[j] * drhodrm2[m]; + } + + // Add the part of the force due to dUdrij and dUdsij + force = dUdrij * recip + dUdsij * d_dscrfcn[fnoffset + jn]; + for (m = 0; m < 3; m++) { + forcem = delij[m] * force + dUdrijm[m]; + a_f(i,m) += forcem; + a_f(j,m) -= -forcem; + } + + // Tabulate per-atom virial as symmetrized stress tensor + + if (vflag_atom != 0) { + fi[0] = delij[0] * force + dUdrijm[0]; + fi[1] = delij[1] * force + dUdrijm[1]; + fi[2] = delij[2] * force + dUdrijm[2]; + v[0] = -0.5 * (delij[0] * fi[0]); + v[1] = -0.5 * (delij[1] * fi[1]); + v[2] = -0.5 * (delij[2] * fi[2]); + v[3] = -0.25 * (delij[0] * fi[1] + delij[1] * fi[0]); + v[4] = -0.25 * (delij[0] * fi[2] + delij[2] * fi[0]); + v[5] = -0.25 * (delij[1] * fi[2] + delij[2] * fi[1]); + + for (m = 0; m < 6; m++) { + a_vatom(i,m) += v[m]; + a_vatom(j,m) += v[m]; + } + } + + // Now compute forces on other atoms k due to change in sij + + if (iszero_kk(sij) || iszero_kk(sij - 1.0)) continue; //: cont jn loop + + double dxik(0), dyik(0), dzik(0); + double dxjk(0), dyjk(0), dzjk(0); + + for (kn = 0; kn < d_numneigh_full[i]; kn++) { + //k = firstneigh_full[kn]; + k = d_neighbors_full(i,kn); + eltk = fmap[type[k]]; + if (k != j && eltk >= 0) { + double xik, xjk, cikj, sikj, dfc, a; + double dCikj1, dCikj2; + double delc, rik2, rjk2; + + sij = d_scrfcn[jn+fnoffset] * d_fcpair[jn+fnoffset]; + const double Cmax = this->Cmax_meam[elti][eltj][eltk]; + const double Cmin = this->Cmin_meam[elti][eltj][eltk]; + + dsij1 = 0.0; + dsij2 = 0.0; + if (!iszero_kk(sij) && !iszero_kk(sij - 1.0)) { + const double rbound = rij2 * this->ebound_meam[elti][eltj]; + delc = Cmax - Cmin; + dxjk = x(k,0) - x(j,0); + dyjk = x(k,1) - x(j,1); + dzjk = x(k,2) - x(j,2); + rjk2 = dxjk * dxjk + dyjk * dyjk + dzjk * dzjk; + if (rjk2 <= rbound) { + dxik = x(k,0) - x(i,0); + dyik = x(k,1) - x(i,1); + dzik = x(k,2) - x(i,2); + rik2 = dxik * dxik + dyik * dyik + dzik * dzik; + if (rik2 <= rbound) { + xik = rik2 / rij2; + xjk = rjk2 / rij2; + a = 1 - (xik - xjk) * (xik - xjk); + if (!iszero_kk(a)) { + cikj = (2.0 * (xik + xjk) + a - 2.0) / a; + if (cikj >= Cmin && cikj <= Cmax) { + cikj = (cikj - Cmin) / delc; + sikj = dfcut(cikj, dfc); + dCfunc2(rij2, rik2, rjk2, dCikj1, dCikj2); + a = sij / delc * dfc / sikj; + dsij1 = a * dCikj1; + dsij2 = a * dCikj2; + } + } + } + } + } + + if (!iszero_kk(dsij1) || !iszero_kk(dsij2)) { + force1 = dUdsij * dsij1; + force2 = dUdsij * dsij2; + + a_f(i,0) += force1 * dxik; + a_f(i,1) += force1 * dyik; + a_f(i,2) += force1 * dzik; + a_f(j,0) += force2 * dxjk; + a_f(j,1) += force2 * dyjk; + a_f(j,2) += force2 * dzjk; + a_f(k,0) -= force1 * dxik + force2 * dxjk; + a_f(k,1) -= force1 * dyik + force2 * dyjk; + a_f(k,2) -= force1 * dzik + force2 * dzjk; + + // Tabulate per-atom virial as symmetrized stress tensor + + if (vflag_atom != 0) { + fi[0] = force1 * dxik; + fi[1] = force1 * dyik; + fi[2] = force1 * dzik; + fj[0] = force2 * dxjk; + fj[1] = force2 * dyjk; + fj[2] = force2 * dzjk; + v[0] = -third * (dxik * fi[0] + dxjk * fj[0]); + v[1] = -third * (dyik * fi[1] + dyjk * fj[1]); + v[2] = -third * (dzik * fi[2] + dzjk * fj[2]); + v[3] = -sixth * (dxik * fi[1] + dxjk * fj[1] + dyik * fi[0] + dyjk * fj[0]); + v[4] = -sixth * (dxik * fi[2] + dxjk * fj[2] + dzik * fi[0] + dzjk * fj[0]); + v[5] = -sixth * (dyik * fi[2] + dyjk * fj[2] + dzik * fi[1] + dzjk * fj[1]); + + for (m = 0; m < 6; m++) { + a_vatom(i,m) += v[m]; + a_vatom(j,m) += v[m]; + a_vatom(k,m) += v[m]; + } + } + } + } + // end of k loop + } + } + } + // end of j loop + } +} diff --git a/src/KOKKOS/meam_funcs_kokkos.h b/src/KOKKOS/meam_funcs_kokkos.h new file mode 100644 index 0000000000..9e41be9ea8 --- /dev/null +++ b/src/KOKKOS/meam_funcs_kokkos.h @@ -0,0 +1,326 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Sebastian Hütter (OvGU) +------------------------------------------------------------------------- */ + +#include "math_special_kokkos.h" +#include +#include "meam_kokkos.h" +using namespace MathSpecialKokkos; + +//----------------------------------------------------------------------------- +// Compute G(gamma) based on selection flag ibar: +// 0 => G = sqrt(1+gamma) +// 1 => G = exp(gamma/2) +// 2 => not implemented +// 3 => G = 2/(1+exp(-gamma)) +// 4 => G = sqrt(1+gamma) +// -5 => G = +-sqrt(abs(1+gamma)) +// +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::G_gam(const double gamma, const int ibar, int &errorflag) const +{ + double gsmooth_switchpoint; + + switch (ibar) { + case 0: + case 4: + gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1); + if (gamma < gsmooth_switchpoint) { + // e.g. gsmooth_factor is 99, {: + // gsmooth_switchpoint = -0.99 + // G = 0.01*(-0.99/gamma)**99 + double G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor); + return sqrt(G); + } else { + return sqrt(1.0 + gamma); + } + case 1: + return MathSpecialKokkos::fm_exp(gamma / 2.0); + case 3: + return 2.0 / (1.0 + exp(-gamma)); + case -5: + if ((1.0 + gamma) >= 0) { + return sqrt(1.0 + gamma); + } else { + return -sqrt(-1.0 - gamma); + } + } + errorflag = 1; + return 0.0; +} + +//----------------------------------------------------------------------------- +// Compute G(gamma and dG(gamma) based on selection flag ibar: +// 0 => G = sqrt(1+gamma) +// 1 => G = exp(gamma/2) +// 2 => not implemented +// 3 => G = 2/(1+exp(-gamma)) +// 4 => G = sqrt(1+gamma) +// -5 => G = +-sqrt(abs(1+gamma)) +// +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::dG_gam(const double gamma, const int ibar, double& dG) const +{ + double gsmooth_switchpoint; + double G; + + switch (ibar) { + case 0: + case 4: + gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1); + if (gamma < gsmooth_switchpoint) { + // e.g. gsmooth_factor is 99, {: + // gsmooth_switchpoint = -0.99 + // G = 0.01*(-0.99/gamma)**99 + G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor); + G = sqrt(G); + dG = -gsmooth_factor * G / (2.0 * gamma); + return G; + } else { + G = sqrt(1.0 + gamma); + dG = 1.0 / (2.0 * G); + return G; + } + case 1: + G = MathSpecialKokkos::fm_exp(gamma / 2.0); + dG = G / 2.0; + return G; + case 3: + G = 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma)); + dG = G * (2.0 - G) / 2; + return G; + case -5: + if ((1.0 + gamma) >= 0) { + G = sqrt(1.0 + gamma); + dG = 1.0 / (2.0 * G); + return G; + } else { + G = -sqrt(-1.0 - gamma); + dG = -1.0 / (2.0 * G); + return G; + } + } + dG = 1.0; + return 0.0; +} + +//----------------------------------------------------------------------------- +// Compute ZBL potential +// +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::zbl(const double r, const int z1, const int z2) const +{ + int i; + const double c[] = { 0.028171, 0.28022, 0.50986, 0.18175 }; + const double d[] = { 0.20162, 0.40290, 0.94229, 3.1998 }; + const double azero = 0.4685; + const double cc = 14.3997; + double a, x; + // azero = (9pi^2/128)^1/3 (0.529) Angstroms + a = azero / (pow(z1, 0.23) + pow(z2, 0.23)); + double result = 0.0; + x = r / a; + for (i = 0; i <= 3; i++) { + result = result + c[i] * MathSpecialKokkos::fm_exp(-d[i] * x); + } + if (r > 0.0) + result = result * z1 * z2 / r * cc; + return result; +} + +//----------------------------------------------------------------------------- +// Compute Rose energy function, I.16 +// +template +KOKKOS_INLINE_FUNCTION +double MEAMKokkos::erose(const double r, const double re, const double alpha, const double Ec, const double repuls, + const double attrac, const int form) const +{ + double astar, a3; + double result = 0.0; + + if (r > 0.0) { + astar = alpha * (r / re - 1.0); + a3 = 0.0; + if (astar >= 0) + a3 = attrac; + else if (astar < 0) + a3 = repuls; + + if (form == 1) + result = -Ec * (1 + astar + (-attrac + repuls / r) * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar); + else if (form == 2) + result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar); + else + result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar) / (r / re)) * MathSpecialKokkos::fm_exp(-astar); + } + return result; +} + +//----------------------------------------------------------------------------- +// Shape factors for various configurations +// +template +KOKKOS_INLINE_FUNCTION +void MEAMKokkos::get_shpfcn(const lattice_t latt, double (&s)[3]) const +{ + switch (latt) { + case FCC: + case BCC: + case B1: + case B2: + s[0] = 0.0; + s[1] = 0.0; + s[2] = 0.0; + break; + case HCP: + s[0] = 0.0; + s[1] = 0.0; + s[2] = 1.0 / 3.0; + break; + case DIA: + s[0] = 0.0; + s[1] = 0.0; + s[2] = 32.0 / 9.0; + break; + case DIM: + s[0] = 1.0; + s[1] = 2.0 / 3.0; + // s(3) = 1.d0 + s[2] = 0.40; + break; + default: + s[0] = 0.0; + // call error('Lattice not defined in get_shpfcn.') + } +} + +//----------------------------------------------------------------------------- +// Number of neighbors for the reference structure +// +template +KOKKOS_INLINE_FUNCTION +int MEAMKokkos::get_Zij(const lattice_t latt) const +{ + switch (latt) { + case FCC: + return 12; + case BCC: + return 8; + case HCP: + return 12; + case B1: + return 6; + case DIA: + return 4; + case DIM: + return 1; + case C11: + return 10; + case L12: + return 12; + case B2: + return 8; + // call error('Lattice not defined in get_Zij.') + } + return 0; +} + +//----------------------------------------------------------------------------- +// Number of second neighbors for the reference structure +// a = distance ratio R1/R2 +// S = second neighbor screening function +// +template +KOKKOS_INLINE_FUNCTION +int MEAMKokkos::get_Zij2(const lattice_t latt, const double cmin, const double cmax, double& a, double& S) const +{ + + double C, x, sijk; + int Zij2 = 0, numscr = 0; + + switch (latt) { + + case FCC: + Zij2 = 6; + a = sqrt(2.0); + numscr = 4; + break; + + case BCC: + Zij2 = 6; + a = 2.0 / sqrt(3.0); + numscr = 4; + break; + + case HCP: + Zij2 = 6; + a = sqrt(2.0); + numscr = 4; + break; + + case B1: + Zij2 = 12; + a = sqrt(2.0); + numscr = 2; + break; + + case DIA: + Zij2 = 12; + a = sqrt(8.0 / 3.0); + numscr = 1; + if (cmin < 0.500001) { + // call error('can not do 2NN MEAM for dia') + } + break; + + case DIM: + // this really shouldn't be allowed; make sure screening is zero + a = 1.0; + S = 0.0; + return 0; + + case L12: + Zij2 = 6; + a = sqrt(2.0); + numscr = 4; + break; + + case B2: + Zij2 = 6; + a = 2.0 / sqrt(3.0); + numscr = 4; + break; + case C11: + // unsupported lattice flag C11 in get_Zij + break; + default: + // unknown lattic flag in get Zij + // call error('Lattice not defined in get_Zij.') + break; + } + + // Compute screening for each first neighbor + C = 4.0 / (a * a) - 1.0; + x = (C - cmin) / (cmax - cmin); + sijk = fcut(x); + // There are numscr first neighbors screening the second neighbors + S = MathSpecialKokkos::powint(sijk, numscr); + return Zij2; +} diff --git a/src/KOKKOS/meam_impl_kokkos.h b/src/KOKKOS/meam_impl_kokkos.h new file mode 100644 index 0000000000..0d2ec99445 --- /dev/null +++ b/src/KOKKOS/meam_impl_kokkos.h @@ -0,0 +1,72 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Sebastian Hütter (OvGU) +------------------------------------------------------------------------- */ + +#include "memory_kokkos.h" +#include "meam_kokkos.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +MEAMKokkos::MEAMKokkos(Memory *mem) : MEAM(mem) +{ +} + +template +MEAMKokkos::~MEAMKokkos() +{ + MemoryKokkos *memoryKK = (MemoryKokkos *)memory; + + memoryKK->destroy_kokkos(k_phirar6,phirar6); + memoryKK->destroy_kokkos(k_phirar5,phirar5); + memoryKK->destroy_kokkos(k_phirar4,phirar4); + memoryKK->destroy_kokkos(k_phirar3,phirar3); + memoryKK->destroy_kokkos(k_phirar2,phirar2); + memoryKK->destroy_kokkos(k_phirar1,phirar1); + memoryKK->destroy_kokkos(k_phirar,phirar); + memoryKK->destroy_kokkos(k_phir,phir); + + memoryKK->destroy_kokkos(k_rho,rho); + memoryKK->destroy_kokkos(k_rho0,rho0); + memoryKK->destroy_kokkos(k_rho1,rho1); + memoryKK->destroy_kokkos(k_rho2,rho2); + memoryKK->destroy_kokkos(k_rho3,rho3); + memoryKK->destroy_kokkos(k_frhop,frhop); + memoryKK->destroy_kokkos(k_gamma,gamma); + memoryKK->destroy_kokkos(k_dgamma1,dgamma1); + memoryKK->destroy_kokkos(k_dgamma2,dgamma2); + memoryKK->destroy_kokkos(k_dgamma3,dgamma3); + memoryKK->destroy_kokkos(k_arho2b,arho2b); + + memoryKK->destroy_kokkos(k_arho1,arho1); + memoryKK->destroy_kokkos(k_arho2,arho2); + memoryKK->destroy_kokkos(k_arho3,arho3); + memoryKK->destroy_kokkos(k_arho3b,arho3b); + memoryKK->destroy_kokkos(k_t_ave,t_ave); + memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave); + + memoryKK->destroy_kokkos(k_scrfcn,scrfcn); + memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn); + memoryKK->destroy_kokkos(k_fcpair,fcpair); +} +#include "meam_setup_done_kokkos.h" +#include "meam_funcs_kokkos.h" +#include "meam_dens_init_kokkos.h" +#include "meam_dens_final_kokkos.h" +#include "meam_force_kokkos.h" +// + diff --git a/src/KOKKOS/meam_kokkos.h b/src/KOKKOS/meam_kokkos.h new file mode 100644 index 0000000000..0231f4b767 --- /dev/null +++ b/src/KOKKOS/meam_kokkos.h @@ -0,0 +1,142 @@ +#ifndef LMP_MEAMKOKKOS_H +#define LMP_MEAMKOKKOS_H + +#include "meam.h" +#include "memory_kokkos.h" +#include "neigh_request.h" +#include "neighbor_kokkos.h" +#include "kokkos.h" +#include +#include + +namespace LAMMPS_NS { + +struct TagMEAMDensFinal{}; +template +struct TagMEAMDensInit{}; +struct TagMEAMInitialize{}; +template +struct TagMEAMforce{}; + +template +class MEAMKokkos : public MEAM +{ +public: + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + MEAMKokkos(Memory* mem); + ~MEAMKokkos(); + + KOKKOS_INLINE_FUNCTION + void operator()(TagMEAMDensFinal, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagMEAMDensInit, const int&, EV_FLOAT&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagMEAMInitialize, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagMEAMforce, const int&, EV_FLOAT&) const; +protected: +//Parameters to meam_dens_init - is there a better way to do this? + int ntype; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread d_offset; + typename AT::t_int_1d_randomread fmap; + typename AT::t_x_array_randomread x; + typename AT::t_int_1d_randomread d_numneigh_half; + typename AT::t_int_1d_randomread d_numneigh_full; + typename AT::t_neighbors_2d d_neighbors_half; + typename AT::t_neighbors_2d d_neighbors_full; + typename AT::t_int_1d_randomread d_ilist_half; + typename AT::t_f_array f; + typename ArrayTypes::t_virial_array d_vatom; +//Parameters to meam_dens_final - is there a better way to do this? + int eflag_either, eflag_global, eflag_atom, vflag_atom; + double *eng_vdwl; + typename ArrayTypes::t_efloat_1d d_eatom; + +public: + void meam_dens_setup(int, int, int); + void meam_setup_done(void); + void meam_dens_init(int , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread, typename AT::t_x_array_randomread, typename AT::t_int_1d_randomread, + typename AT::t_int_1d_randomread , int* , typename AT::t_int_1d_randomread, typename AT::t_neighbors_2d,typename AT::t_neighbors_2d,typename AT::t_int_1d_randomread, int ); + void meam_dens_final(int , int , int , int , double* , + typename ArrayTypes::t_efloat_1d , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , int& ); + void meam_force(int , int , int , int , int , double* , + typename ArrayTypes::t_efloat_1d , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread , + typename AT::t_int_1d_randomread , typename AT::t_f_array , typename ArrayTypes::t_virial_array ,typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread, typename AT::t_neighbors_2d, typename AT::t_neighbors_2d, int); + template + KOKKOS_INLINE_FUNCTION + void getscreen(int , int, typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread, + typename AT::t_int_1d_randomread, int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread ) const; + template + KOKKOS_INLINE_FUNCTION + void calc_rho1(int , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread, int ) const; + KOKKOS_INLINE_FUNCTION + double fcut(const double xi) const; + KOKKOS_INLINE_FUNCTION + double dfcut(const double xi, double& dfc) const; + KOKKOS_INLINE_FUNCTION + double dCfunc(const double, const double, const double) const; + KOKKOS_INLINE_FUNCTION + void dCfunc2(const double, const double, const double, double&, double&) const; + KOKKOS_INLINE_FUNCTION + double G_gam(const double, const int, int&) const; + KOKKOS_INLINE_FUNCTION + double dG_gam(const double, const int, double&) const; + KOKKOS_INLINE_FUNCTION + double zbl(const double, const int, const int) const; + KOKKOS_INLINE_FUNCTION + double erose(const double, const double, const double, const double, const double, const double, const int) const; + KOKKOS_INLINE_FUNCTION + void get_shpfcn(const lattice_t latt, double (&s)[3]) const; + KOKKOS_INLINE_FUNCTION + int get_Zij(const lattice_t ) const; + KOKKOS_INLINE_FUNCTION + int get_Zij2(const lattice_t, const double, const double, double&, double&) const; +public: + DAT::tdual_ffloat_1d k_rho, k_rho0, k_rho1, k_rho2, k_rho3, k_frhop; + typename ArrayTypes::t_ffloat_1d d_rho, d_rho0,d_rho1, d_rho2, d_rho3, d_frhop; + HAT::t_ffloat_1d h_rho, h_rho0, h_rho1, h_rho2, h_rho3, h_frhop; + DAT::tdual_ffloat_1d k_gamma, k_dgamma1, k_dgamma2, k_dgamma3, k_arho2b; + typename ArrayTypes::t_ffloat_1d d_gamma, d_dgamma1, d_dgamma2, d_dgamma3, d_arho2b; + HAT::t_ffloat_1d h_gamma, h_dgamma1, h_dgamma2, h_dgamma3, h_arho2b; + DAT::tdual_ffloat_2d k_arho1, k_arho2, k_arho3, k_arho3b, k_t_ave, k_tsq_ave; + typename ArrayTypes::t_ffloat_2d d_arho1, d_arho2, d_arho3, d_arho3b, d_t_ave, d_tsq_ave; + HAT::t_ffloat_2d h_arho1, h_arho2, h_arho3, h_arho3b, h_t_ave, h_tsq_ave; + DAT::tdual_ffloat_2d k_phir, k_phirar, k_phirar1, k_phirar2, k_phirar3, k_phirar4, k_phirar5, k_phirar6; + typename ArrayTypes::t_ffloat_2d d_phir, d_phirar, d_phirar1, d_phirar2, d_phirar3, d_phirar4, d_phirar5, d_phirar6; + HAT::t_ffloat_2d h_phir, h_phirar, h_phirar1, h_phirar2, h_phirar3, h_phirar4, h_phirar5, h_phirar6; + DAT::tdual_ffloat_1d k_scrfcn, k_dscrfcn, k_fcpair; + typename ArrayTypes::t_ffloat_1d d_scrfcn, d_dscrfcn, d_fcpair; + HAT::t_ffloat_1d h_scrfcn, h_dscrfcn, h_fcpair; + + +}; +KOKKOS_INLINE_FUNCTION +static bool iszero_kk(const double f) { + return fabs(f) < 1e-20; +} + + +KOKKOS_INLINE_FUNCTION +static double fdiv_zero_kk(const double n, const double d) { + if (iszero_kk(d)) + return 0.0; + return n / d; +} + +// Functions we need for compat + +} +#include "meam_impl_kokkos.h" +//#include "meam_setup_done_kokkos.h" +//#include "meam_funcs_kokkos.h" +//#include "meam_dens_init_kokkos.h" +//#include "meam_dens_final_kokkos.h" +//#include "meam_force_kokkos.h" +#endif diff --git a/src/KOKKOS/meam_setup_done_kokkos.h b/src/KOKKOS/meam_setup_done_kokkos.h new file mode 100644 index 0000000000..72852bf2cb --- /dev/null +++ b/src/KOKKOS/meam_setup_done_kokkos.h @@ -0,0 +1,71 @@ +#include "meam_kokkos.h" + +template +void MEAMKokkos::meam_setup_done(void) +{ + MemoryKokkos *memoryKK = (MemoryKokkos *)memory; + + memoryKK->destroy_kokkos(k_phirar6,phirar6); + memoryKK->destroy_kokkos(k_phirar5,phirar5); + memoryKK->destroy_kokkos(k_phirar4,phirar4); + memoryKK->destroy_kokkos(k_phirar3,phirar3); + memoryKK->destroy_kokkos(k_phirar2,phirar2); + memoryKK->destroy_kokkos(k_phirar1,phirar1); + memoryKK->destroy_kokkos(k_phirar,phirar); + memoryKK->destroy_kokkos(k_phir,phir); + + memoryKK->create_kokkos(k_phir, phir, (neltypes * (neltypes + 1)) / 2, nr, "pair:phir"); + memoryKK->create_kokkos(k_phirar, phirar, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar"); + memoryKK->create_kokkos(k_phirar1, phirar1, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar1"); + memoryKK->create_kokkos(k_phirar2, phirar2, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar2"); + memoryKK->create_kokkos(k_phirar3, phirar3, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar3"); + memoryKK->create_kokkos(k_phirar4, phirar4, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar4"); + memoryKK->create_kokkos(k_phirar5, phirar5, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar5"); + memoryKK->create_kokkos(k_phirar6, phirar6, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar6"); + + h_phir = k_phir.h_view; + h_phirar = k_phirar.h_view; + h_phirar1 = k_phirar1.h_view; + h_phirar2 = k_phirar2.h_view; + h_phirar3 = k_phirar3.h_view; + h_phirar4 = k_phirar4.h_view; + h_phirar5 = k_phirar5.h_view; + h_phirar6 = k_phirar6.h_view; + + for (int i = 0; i <(neltypes * (neltypes + 1)) / 2; i++) + for(int j = 0; j < nr; j++) + { + h_phir(i,j) = phir[i][j]; + h_phirar(i,j) = phirar[i][j]; + h_phirar1(i,j) = phirar1[i][j]; + h_phirar2(i,j) = phirar2[i][j]; + h_phirar3(i,j) = phirar3[i][j]; + h_phirar4(i,j) = phirar4[i][j]; + h_phirar5(i,j) = phirar5[i][j]; + h_phirar6(i,j) = phirar6[i][j]; + } + k_phir.template modify(); + k_phir.template sync(); + d_phir = k_phir.template view(); + k_phirar.template modify(); + k_phirar.template sync(); + d_phirar = k_phirar.template view(); + k_phirar1.template modify(); + k_phirar1.template sync(); + d_phirar1 = k_phirar1.template view(); + k_phirar2.template modify(); + k_phirar2.template sync(); + d_phirar2 = k_phirar2.template view(); + k_phirar3.template modify(); + k_phirar3.template sync(); + d_phirar3 = k_phirar3.template view(); + k_phirar4.template modify(); + k_phirar4.template sync(); + d_phirar4 = k_phirar4.template view(); + k_phirar5.template modify(); + k_phirar5.template sync(); + d_phirar5 = k_phirar5.template view(); + k_phirar6.template modify(); + k_phirar6.template sync(); + d_phirar6 = k_phirar6.template view(); +} diff --git a/src/KOKKOS/pair_meam_kokkos.cpp b/src/KOKKOS/pair_meam_kokkos.cpp new file mode 100644 index 0000000000..38386d0a7a --- /dev/null +++ b/src/KOKKOS/pair_meam_kokkos.cpp @@ -0,0 +1,633 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Greg Wagner (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +//KK* +#include "meam_kokkos.h" +#include "kokkos.h" +#include "pair_kokkos.h" +//#include "pair_meamc.h" + +#include "pair_meam_kokkos.h" +#include "atom_kokkos.h" +//*KK +#include "force.h" +#include "comm.h" +//KK* +//#include "memory.h" +#include "memory_kokkos.h" +//*KK +#include "neighbor.h" +//KK* +//#include "neigh_list.h" +#include "neigh_list_kokkos.h" +//*KK +#include "neigh_request.h" +#include "error.h" +//*KK +#include "atom_masks.h" +//*KK + +using namespace LAMMPS_NS; + +#if 0 +static const int nkeywords = 21; +static const char *keywords[] = { + "Ec","alpha","rho0","delta","lattce", + "attrac","repuls","nn2","Cmin","Cmax","rc","delr", + "augt1","gsmooth_factor","re","ialloy", + "mixture_ref_t","erose_form","zbl", + "emb_lin_neg","bkgd_dyn"}; +#endif + +/* ---------------------------------------------------------------------- */ +template +PairMEAMKokkos::PairMEAMKokkos(LAMMPS *lmp) : PairMEAM(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + meam_inst_kk = new MEAMKokkos(memory); + delete meam_inst; + meam_inst = meam_inst_kk; +} +/* ---------------------------------------------------------------------- */ + +template +PairMEAMKokkos::~PairMEAMKokkos() +{ + if (!copymode) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + delete meam_inst_kk; + } +} + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +template +void PairMEAMKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + + // neighbor list info + + NeighListKokkos* k_halflist = static_cast*>(listhalf); + int inum_half = listhalf->inum; + int* numneigh_half = listhalf->numneigh; + int* ilist_half = listhalf->ilist; + + d_ilist_half = k_halflist->d_ilist; + d_numneigh_half = k_halflist->d_numneigh; + d_neighbors_half = k_halflist->d_neighbors; + NeighListKokkos* k_fulllist = static_cast*>(listfull); + d_numneigh_full = k_fulllist->d_numneigh; + d_neighbors_full = k_fulllist->d_neighbors; + + copymode = 1; + + // strip neighbor lists of any special bond flags before using with MEAM + // necessary before doing neigh_f2c and neigh_c2f conversions each step + if (neighbor->ago == 0) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_half),*this); + } + + // check size of scrfcn based on half neighbor list + + nlocal = atom->nlocal; + nall = nlocal + atom->nghost; + + int n = 0; + //for (ii = 0; ii < inum_half; ii++) n += numneigh_half[ilist_half[ii]]; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,inum_half), *this, n); + + meam_inst_kk->meam_dens_setup(atom->nmax, nall, n); + + //double **x = atom->x; + x = atomKK->k_x.view(); + + //double **f = atom->f; + f = atomKK->k_f.view(); + + //int *type = atom->type; + type = atomKK->k_type.view(); + + int ntype = atom->ntypes; + + // 3 stages of MEAM calculation + // loop over my atoms followed by communication + + int offset = 0; + int errorflag = 0; +#if 0 + for (ii = 0; ii < inum_half; ii++) { + i = ilist_half[ii]; + meam_inst->meam_dens_init(i,ntype,type,map,x, + numneigh_half[i],firstneigh_half[i], + numneigh_full[i],firstneigh_full[i], + offset); + offset += numneigh_half[i]; + } +#endif + // To do: create the cumulative offset array in host and device + + k_offset = DAT::tdual_int_1d("pair:offset",inum_half+1); + h_offset = k_offset.h_view; + d_offset = k_offset.template view(); + ArrayTypes::t_int_1d h_ilist; + ArrayTypes::t_int_1d h_numneigh; + h_ilist = Kokkos::create_mirror_view(k_halflist->d_ilist); + h_numneigh = Kokkos::create_mirror_view(k_halflist->d_numneigh); + Kokkos::deep_copy(h_ilist,k_halflist->d_ilist); + Kokkos::deep_copy(h_numneigh,k_halflist->d_numneigh); + + h_offset[0] = 0; + for (int ii = 0; ii < inum_half; ii++) { + int i = h_ilist[ii]; + h_offset[ii+1] = h_offset[ii] + h_numneigh[i]; + } + k_offset.template modify(); + k_offset.template sync(); + meam_inst_kk->meam_dens_init(inum_half,ntype,type,d_map,x,d_numneigh_half,d_numneigh_full,&offset,d_ilist_half,d_neighbors_half, d_neighbors_full, d_offset, neighflag); + meam_inst_kk->k_rho0.template modify(); + meam_inst_kk->k_rho0.template sync(); + + meam_inst_kk->k_arho2b.template modify(); + meam_inst_kk->k_arho2b.template sync(); + + meam_inst_kk->k_arho1.template modify(); + meam_inst_kk->k_arho1.template sync(); + + meam_inst_kk->k_arho2.template modify(); + meam_inst_kk->k_arho2.template sync(); + + meam_inst_kk->k_arho3.template modify(); + meam_inst_kk->k_arho3.template sync(); + + meam_inst_kk->k_arho3b.template modify(); + meam_inst_kk->k_arho3b.template sync(); + + meam_inst_kk->k_t_ave.template modify(); + meam_inst_kk->k_t_ave.template sync(); + + meam_inst_kk->k_tsq_ave.template modify(); + meam_inst_kk->k_tsq_ave.template sync(); + + comm->reverse_comm(this); + + meam_inst_kk->k_rho0.template modify(); + meam_inst_kk->k_rho0.template sync(); + + meam_inst_kk->k_arho2b.template modify(); + meam_inst_kk->k_arho2b.template sync(); + + meam_inst_kk->k_arho1.template modify(); + meam_inst_kk->k_arho1.template sync(); + + meam_inst_kk->k_arho2.template modify(); + meam_inst_kk->k_arho2.template sync(); + + meam_inst_kk->k_arho3.template modify(); + meam_inst_kk->k_arho3.template sync(); + + meam_inst_kk->k_arho3b.template modify(); + meam_inst_kk->k_arho3b.template sync(); + + meam_inst_kk->k_t_ave.template modify(); + meam_inst_kk->k_t_ave.template sync(); + + meam_inst_kk->k_tsq_ave.template modify(); + meam_inst_kk->k_tsq_ave.template sync(); + + + meam_inst_kk->meam_dens_final(nlocal,eflag_either,eflag_global,eflag_atom, + &eng_vdwl,d_eatom,ntype,type,d_map,errorflag); + if (errorflag) { + char str[128]; + sprintf(str,"MEAM library error %d",errorflag); + error->one(FLERR,str); + } + + comm->forward_comm(this); + + offset = 0; + + // vptr is first value in vatom if it will be used by meam_force() + // else vatom may not exist, so pass dummy ptr + +#if 0 // To do: is this correct? vflag_atom is used to access vatom + typename ArrayTypes::t_virial_array vptr; + if (vflag_atom) vptr = d_vatom; + else vptr = NULL; + for (ii = 0; ii < inum_half; ii++) { + i = ilist_half[ii]; + meam_inst->meam_force(i,eflag_either,eflag_global,eflag_atom, + vflag_atom,&eng_vdwl,eatom,ntype,type,map,x, + numneigh_half[i],firstneigh_half[i], + numneigh_full[i],firstneigh_full[i], + offset,f,vptr); + offset += numneigh_half[i]; + } +#endif + meam_inst_kk->meam_force(inum_half, eflag_either,eflag_global,eflag_atom, + vflag_atom,&eng_vdwl,d_eatom,ntype,type,d_map,x, + d_numneigh_half, d_numneigh_full,f,d_vatom,d_ilist_half, d_offset, d_neighbors_half, d_neighbors_full, neighflag); + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ +template +void PairMEAMKokkos::coeff(int narg, char **arg) +{ + PairMEAM::coeff(narg,arg); + + //sync map + + int n = atom->ntypes; + k_map = DAT::tdual_int_1d("pair:map",n+1); + HAT::t_int_1d h_map = k_map.h_view; + + for (int i = 1; i <= n; i++) + h_map[i] = map[i]; + + k_map.template modify(); + k_map.template sync(); + + d_map = k_map.template view(); + + // To do: need to synchronize phirar variables + + meam_inst_kk->meam_setup_done(); + +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ +template +void PairMEAMKokkos::init_style() +{ + + PairMEAM::init_style(); + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + + // MEAM needs both a full and half neighbor list? Not sure how to get that. + if (!(neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD)) + error->all(FLERR, "Cannot use chosen neighbor list style with pair meam/kk"); + + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); + +} + +/* ---------------------------------------------------------------------- */ +template +int PairMEAMKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, + int pbc_flag, int *pbc) +{ + d_sendlist = k_sendlist.view(); + iswap = iswap_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + return n; +} + +template +KOKKOS_INLINE_FUNCTION +void PairMEAMKokkos::operator()(TagPairMEAMPackForwardComm, const int &i) const { + int j = d_sendlist(iswap, i); + int m = i*38; + v_buf[m++] = meam_inst_kk->d_rho0[j]; + v_buf[m++] = meam_inst_kk->d_rho1[j]; + v_buf[m++] = meam_inst_kk->d_rho2[j]; + v_buf[m++] = meam_inst_kk->d_rho3[j]; + v_buf[m++] = meam_inst_kk->d_frhop[j]; + v_buf[m++] = meam_inst_kk->d_gamma[j]; + v_buf[m++] = meam_inst_kk->d_dgamma1[j]; + v_buf[m++] = meam_inst_kk->d_dgamma2[j]; + v_buf[m++] = meam_inst_kk->d_dgamma3[j]; + v_buf[m++] = meam_inst_kk->d_arho2b[j]; + v_buf[m++] = meam_inst_kk->d_arho1(j,0); + v_buf[m++] = meam_inst_kk->d_arho1(j,1); + v_buf[m++] = meam_inst_kk->d_arho1(j,2); + v_buf[m++] = meam_inst_kk->d_arho2(j,0); + v_buf[m++] = meam_inst_kk->d_arho2(j,1); + v_buf[m++] = meam_inst_kk->d_arho2(j,2); + v_buf[m++] = meam_inst_kk->d_arho2(j,3); + v_buf[m++] = meam_inst_kk->d_arho2(j,4); + v_buf[m++] = meam_inst_kk->d_arho2(j,5); + for (int k = 0; k < 10; k++) v_buf[m++] = meam_inst_kk->d_arho3(j,k); + v_buf[m++] = meam_inst_kk->d_arho3b(j,0); + v_buf[m++] = meam_inst_kk->d_arho3b(j,1); + v_buf[m++] = meam_inst_kk->d_arho3b(j,2); + v_buf[m++] = meam_inst_kk->d_t_ave(j,0); + v_buf[m++] = meam_inst_kk->d_t_ave(j,1); + v_buf[m++] = meam_inst_kk->d_t_ave(j,2); + v_buf[m++] = meam_inst_kk->d_tsq_ave(j,0); + v_buf[m++] = meam_inst_kk->d_tsq_ave(j,1); + v_buf[m++] = meam_inst_kk->d_tsq_ave(j,2); +} + +/* ---------------------------------------------------------------------- */ +template +void PairMEAMKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); +} + +template +KOKKOS_INLINE_FUNCTION +void PairMEAMKokkos::operator()(TagPairMEAMUnpackForwardComm, const int &i) const{ + int m = i*38; + + meam_inst_kk->d_rho0[i+first] = v_buf[m++]; + meam_inst_kk->d_rho1[i+first] = v_buf[m++]; + meam_inst_kk->d_rho2[i+first] = v_buf[m++]; + meam_inst_kk->d_rho3[i+first] = v_buf[m++]; + meam_inst_kk->d_frhop[i+first] = v_buf[m++]; + meam_inst_kk->d_gamma[i+first] = v_buf[m++]; + meam_inst_kk->d_dgamma1[i+first] = v_buf[m++]; + meam_inst_kk->d_dgamma2[i+first] = v_buf[m++]; + meam_inst_kk->d_dgamma3[i+first] = v_buf[m++]; + meam_inst_kk->d_arho2b[i+first] = v_buf[m++]; + meam_inst_kk->d_arho1(i+first,0) = v_buf[m++]; + meam_inst_kk->d_arho1(i+first,1) = v_buf[m++]; + meam_inst_kk->d_arho1(i+first,2) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,0) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,1) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,2) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,3) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,4) = v_buf[m++]; + meam_inst_kk->d_arho2(i+first,5) = v_buf[m++]; + for (int k = 0; k < 10; k++) meam_inst_kk->d_arho3(i+first,k) = v_buf[m++]; + meam_inst_kk->d_arho3b(i+first,0) = v_buf[m++]; + meam_inst_kk->d_arho3b(i+first,1) = v_buf[m++]; + meam_inst_kk->d_arho3b(i+first,2) = v_buf[m++]; + meam_inst_kk->d_t_ave(i+first,0) = v_buf[m++]; + meam_inst_kk->d_t_ave(i+first,1) = v_buf[m++]; + meam_inst_kk->d_t_ave(i+first,2) = v_buf[m++]; + meam_inst_kk->d_tsq_ave(i+first,0) = v_buf[m++]; + meam_inst_kk->d_tsq_ave(i+first,1) = v_buf[m++]; + meam_inst_kk->d_tsq_ave(i+first,2) = v_buf[m++]; + } + +template +int PairMEAMKokkos::pack_forward_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,k,m; + + m = 0; + + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = meam_inst_kk->h_rho0[j]; + buf[m++] = meam_inst_kk->h_rho1[j]; + buf[m++] = meam_inst_kk->h_rho2[j]; + buf[m++] = meam_inst_kk->h_rho3[j]; + buf[m++] = meam_inst_kk->h_frhop[j]; + buf[m++] = meam_inst_kk->h_gamma[j]; + buf[m++] = meam_inst_kk->h_dgamma1[j]; + buf[m++] = meam_inst_kk->h_dgamma2[j]; + buf[m++] = meam_inst_kk->h_dgamma3[j]; + buf[m++] = meam_inst_kk->h_arho2b[j]; + buf[m++] = meam_inst_kk->h_arho1(j,0); + buf[m++] = meam_inst_kk->h_arho1(j,1); + buf[m++] = meam_inst_kk->h_arho1(j,2); + buf[m++] = meam_inst_kk->h_arho2(j,0); + buf[m++] = meam_inst_kk->h_arho2(j,1); + buf[m++] = meam_inst_kk->h_arho2(j,2); + buf[m++] = meam_inst_kk->h_arho2(j,3); + buf[m++] = meam_inst_kk->h_arho2(j,4); + buf[m++] = meam_inst_kk->h_arho2(j,5); + for (k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(j,k); + buf[m++] = meam_inst_kk->h_arho3b(j,0); + buf[m++] = meam_inst_kk->h_arho3b(j,1); + buf[m++] = meam_inst_kk->h_arho3b(j,2); + buf[m++] = meam_inst_kk->h_t_ave(j,0); + buf[m++] = meam_inst_kk->h_t_ave(j,1); + buf[m++] = meam_inst_kk->h_t_ave(j,2); + buf[m++] = meam_inst_kk->h_tsq_ave(j,0); + buf[m++] = meam_inst_kk->h_tsq_ave(j,1); + buf[m++] = meam_inst_kk->h_tsq_ave(j,2); + } + + return m; +} + +template +void PairMEAMKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + int i,k,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + meam_inst_kk->h_rho0[i] = buf[m++]; + meam_inst_kk->h_rho1[i] = buf[m++]; + meam_inst_kk->h_rho2[i] = buf[m++]; + meam_inst_kk->h_rho3[i] = buf[m++]; + meam_inst_kk->h_frhop[i] = buf[m++]; + meam_inst_kk->h_gamma[i] = buf[m++]; + meam_inst_kk->h_dgamma1[i] = buf[m++]; + meam_inst_kk->h_dgamma2[i] = buf[m++]; + meam_inst_kk->h_dgamma3[i] = buf[m++]; + meam_inst_kk->h_arho2b[i] = buf[m++]; + meam_inst_kk->h_arho1(i,0) = buf[m++]; + meam_inst_kk->h_arho1(i,1) = buf[m++]; + meam_inst_kk->h_arho1(i,2) = buf[m++]; + meam_inst_kk->h_arho2(i,0) = buf[m++]; + meam_inst_kk->h_arho2(i,1) = buf[m++]; + meam_inst_kk->h_arho2(i,2) = buf[m++]; + meam_inst_kk->h_arho2(i,3) = buf[m++]; + meam_inst_kk->h_arho2(i,4) = buf[m++]; + meam_inst_kk->h_arho2(i,5) = buf[m++]; + for (k = 0; k < 10; k++) meam_inst_kk->h_arho3(i,k) = buf[m++]; + meam_inst_kk->h_arho3b(i,0) = buf[m++]; + meam_inst_kk->h_arho3b(i,1) = buf[m++]; + meam_inst_kk->h_arho3b(i,2) = buf[m++]; + meam_inst_kk->h_t_ave(i,0) = buf[m++]; + meam_inst_kk->h_t_ave(i,1) = buf[m++]; + meam_inst_kk->h_t_ave(i,2) = buf[m++]; + meam_inst_kk->h_tsq_ave(i,0) = buf[m++]; + meam_inst_kk->h_tsq_ave(i,1) = buf[m++]; + meam_inst_kk->h_tsq_ave(i,2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ +template +int PairMEAMKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,k,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + buf[m++] = meam_inst_kk->h_rho0[i]; + buf[m++] = meam_inst_kk->h_arho2b[i]; + buf[m++] = meam_inst_kk->h_arho1(i,0); + buf[m++] = meam_inst_kk->h_arho1(i,1); + buf[m++] = meam_inst_kk->h_arho1(i,2); + buf[m++] = meam_inst_kk->h_arho2(i,0); + buf[m++] = meam_inst_kk->h_arho2(i,1); + buf[m++] = meam_inst_kk->h_arho2(i,2); + buf[m++] = meam_inst_kk->h_arho2(i,3); + buf[m++] = meam_inst_kk->h_arho2(i,4); + buf[m++] = meam_inst_kk->h_arho2(i,5); + for (k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(i,k); + buf[m++] = meam_inst_kk->h_arho3b(i,0); + buf[m++] = meam_inst_kk->h_arho3b(i,1); + buf[m++] = meam_inst_kk->h_arho3b(i,2); + buf[m++] = meam_inst_kk->h_t_ave(i,0); + buf[m++] = meam_inst_kk->h_t_ave(i,1); + buf[m++] = meam_inst_kk->h_t_ave(i,2); + buf[m++] = meam_inst_kk->h_tsq_ave(i,0); + buf[m++] = meam_inst_kk->h_tsq_ave(i,1); + buf[m++] = meam_inst_kk->h_tsq_ave(i,2); + } + + return m; +} + +/* ---------------------------------------------------------------------- */ +template +void PairMEAMKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,k,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + meam_inst_kk->h_rho0[j] += buf[m++]; + meam_inst_kk->h_arho2b[j] += buf[m++]; + meam_inst_kk->h_arho1(j,0) += buf[m++]; + meam_inst_kk->h_arho1(j,1) += buf[m++]; + meam_inst_kk->h_arho1(j,2) += buf[m++]; + meam_inst_kk->h_arho2(j,0) += buf[m++]; + meam_inst_kk->h_arho2(j,1) += buf[m++]; + meam_inst_kk->h_arho2(j,2) += buf[m++]; + meam_inst_kk->h_arho2(j,3) += buf[m++]; + meam_inst_kk->h_arho2(j,4) += buf[m++]; + meam_inst_kk->h_arho2(j,5) += buf[m++]; + for (k = 0; k < 10; k++) meam_inst_kk->h_arho3(j,k) += buf[m++]; + meam_inst_kk->h_arho3b(j,0) += buf[m++]; + meam_inst_kk->h_arho3b(j,1) += buf[m++]; + meam_inst_kk->h_arho3b(j,2) += buf[m++]; + meam_inst_kk->h_t_ave(j,0) += buf[m++]; + meam_inst_kk->h_t_ave(j,1) += buf[m++]; + meam_inst_kk->h_t_ave(j,2) += buf[m++]; + meam_inst_kk->h_tsq_ave(j,0) += buf[m++]; + meam_inst_kk->h_tsq_ave(j,1) += buf[m++]; + meam_inst_kk->h_tsq_ave(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ +template +double PairMEAMKokkos::memory_usage() +{ + double bytes = 11 * meam_inst_kk->nmax * sizeof(double); + bytes += (3 + 6 + 10 + 3 + 3 + 3) * meam_inst_kk->nmax * sizeof(double); + bytes += 3 * meam_inst_kk->maxneigh * sizeof(double); + return bytes; +} +template +KOKKOS_INLINE_FUNCTION +void PairMEAMKokkos::operator()(TagPairMEAMKernelNeighStrip, const int &ii) const { +/* ---------------------------------------------------------------------- + * strip special bond flags from neighbor list entries + * are not used with MEAM + * need to do here so Fortran lib doesn't see them + * done once per reneighbor so that neigh_f2c and neigh_c2f don't see them + * ------------------------------------------------------------------------- */ + const int i = d_ilist_half[ii]; + const int jnum_half = d_numneigh_half[i]; + const int jnum_full = d_numneigh_full[i]; + for (int jj = 0; jj < jnum_half; jj++) { + d_neighbors_half(i,jj) &= NEIGHMASK; + } + for (int jj = 0; jj < jnum_full; jj++) { + d_neighbors_full(i,jj) &= NEIGHMASK; + } +} + +template +KOKKOS_INLINE_FUNCTION +void PairMEAMKokkos::operator()(TagPairMEAMKernelA, const int ii, int &n) const { + const int i = d_ilist_half[ii]; + n += d_numneigh_half[i]; +} + +namespace LAMMPS_NS { +template class PairMEAMKokkos; +#ifdef KOKKOS_ENABLE_CUDA +template class PairMEAMKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_meam_kokkos.h b/src/KOKKOS/pair_meam_kokkos.h new file mode 100644 index 0000000000..114f4a1665 --- /dev/null +++ b/src/KOKKOS/pair_meam_kokkos.h @@ -0,0 +1,166 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(meam/c/kk,PairMEAMKokkos) +PairStyle(meam/c/kk/device,PairMEAMKokkos) +PairStyle(meam/c/kk/host,PairMEAMKokkos) +PairStyle(meam/kk,PairMEAMKokkos) +PairStyle(meam/kk/device,PairMEAMKokkos) +PairStyle(meam/kk/host,PairMEAMKokkos) + +#else + +#ifndef LMP_PAIR_MEAMC_KOKKOS_H +#define LMP_PAIR_MEAMC_KOKKOS_H + +#include "kokkos_base.h" +#include "pair_kokkos.h" +#include "pair_meam.h" +#include "neigh_list_kokkos.h" +#include "meam_kokkos.h" + + +namespace LAMMPS_NS { +struct TagPairMEAMKernelNeighStrip{}; +struct TagPairMEAMKernelA{}; +struct TagPairMEAMPackForwardComm{}; +struct TagPairMEAMUnpackForwardComm{}; + +template +class MEAMKokkos; + +template +class PairMEAMKokkos : public PairMEAM, public KokkosBase { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + //typedef EV_FLOAT value_type; + + PairMEAMKokkos(class LAMMPS *); + virtual ~PairMEAMKokkos(); + void compute(int, int); + void coeff(int, char **); + void init_style(); + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMEAMPackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMEAMUnpackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMEAMKernelNeighStrip, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMEAMKernelA, const int, int&) const; + + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + double memory_usage(); + + protected: + class MEAMKokkos *meam_inst_kk; + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename ArrayTypes::t_efloat_1d d_eatom; + typename ArrayTypes::t_virial_array d_vatom; + + DAT::tdual_int_1d k_offset; + HAT::t_int_1d h_offset; + typename AT::t_int_1d_randomread d_offset; + + DAT::tdual_int_1d k_map; + typename AT::t_int_1d_randomread d_map; + typename AT::t_int_1d_randomread d_ilist_half; + typename AT::t_int_1d_randomread d_numneigh_half; + typename AT::t_neighbors_2d d_neighbors_half; + typename AT::t_int_1d_randomread d_numneigh_full; + typename AT::t_neighbors_2d d_neighbors_full; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + + int neighflag,nlocal,nall,eflag,vflag; + int iswap; + int first; + + friend void pair_virial_fdotr_compute(PairMEAMKokkos*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: MEAM library error %d + +A call to the MEAM Fortran library returned an error. + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair style MEAM requires newton pair on + +See the newton command. This is a restriction to use the MEAM +potential. + +E: Cannot open MEAM potential file %s + +The specified MEAM potential file cannot be opened. Check that the +path and name are correct. + +E: Incorrect format in MEAM potential file + +Incorrect number of words per line in the potential file. + +E: Unrecognized lattice type in MEAM file 1 + +The lattice type in an entry of the MEAM library file is not +valid. + +E: Did not find all elements in MEAM library file + +The requested elements were not found in the MEAM file. + +E: Keyword %s in MEAM parameter file not recognized + +Self-explanatory. + +E: Unrecognized lattice type in MEAM file 2 + +The lattice type in an entry of the MEAM parameter file is not +valid. + +*/ diff --git a/src/MEAM/meam.h b/src/MEAM/meam.h index 237ffed8aa..8b94ac0084 100644 --- a/src/MEAM/meam.h +++ b/src/MEAM/meam.h @@ -29,7 +29,7 @@ class MEAM { MEAM(Memory *mem); ~MEAM(); - private: + protected: Memory *memory; // cutforce = force cutoff diff --git a/src/MEAM/pair_meam.cpp b/src/MEAM/pair_meam.cpp index 7dcb16d3f6..ab027dcb89 100644 --- a/src/MEAM/pair_meam.cpp +++ b/src/MEAM/pair_meam.cpp @@ -73,6 +73,8 @@ PairMEAM::PairMEAM(LAMMPS *lmp) : Pair(lmp) PairMEAM::~PairMEAM() { + if (copymode) return; + delete meam_inst; if (allocated) { diff --git a/src/MEAM/pair_meam.h b/src/MEAM/pair_meam.h index eea3893309..ffe8045ac9 100644 --- a/src/MEAM/pair_meam.h +++ b/src/MEAM/pair_meam.h @@ -43,7 +43,7 @@ class PairMEAM : public Pair { void unpack_reverse_comm(int, int *, double *) override; double memory_usage() override; - private: + protected: class MEAM *meam_inst; double cutmax; // max cutoff for all elements int nlibelements; // # of library elements @@ -52,7 +52,7 @@ class PairMEAM : public Pair { double **scale; // scaling factor for adapt - void allocate(); + virtual void allocate(); void read_files(const std::string &, const std::string &); void read_global_meam_file(const std::string &); void read_user_meam_file(const std::string &);