Finish porting pair MEAM to Kokkos

This commit is contained in:
Stan Moore
2022-07-01 11:20:51 -06:00
parent 607ed68acd
commit ae7215037d
26 changed files with 1806 additions and 1323 deletions

View File

@ -194,7 +194,7 @@ OPT.
* :doc:`lubricateU/poly <pair_lubricateU>`
* :doc:`mdpd <pair_mesodpd>`
* :doc:`mdpd/rhosum <pair_mesodpd>`
* :doc:`meam <pair_meam>`
* :doc:`meam (k) <pair_meam>`
* :doc:`meam/spline (o) <pair_meam_spline>`
* :doc:`meam/sw/spline <pair_meam_sw_spline>`
* :doc:`mesocnt <pair_mesocnt>`

View File

@ -71,7 +71,7 @@ Syntax
*no_affinity* values = none
*kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *comm/pair/forward* *comm/fix/forward* or *comm/reverse* or *gpu/aware* or *pair/only*
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *comm/pair/forward* *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *gpu/aware* or *pair/only*
*neigh* value = *full* or *half*
full = full neighbor list
half = half neighbor list built in thread-safe manner
@ -96,6 +96,7 @@ Syntax
*comm/pair/forward* value = *no* or *device*
*comm/fix/forward* value = *no* or *device*
*comm/reverse* value = *no* or *host* or *device*
*comm/pair/reverse* value = *no* or *device*
no = perform communication pack/unpack in non-KOKKOS mode
host = perform pack/unpack on host (e.g. with OpenMP threading)
device = perform pack/unpack on device (e.g. on GPU)
@ -500,7 +501,7 @@ rule of thumb may give too large a binsize and the default should be
overridden with a smaller value.
The *comm* and *comm/exchange* and *comm/forward* and *comm/pair/forward*
and *comm/fix/forward* and comm/reverse*
and *comm/fix/forward* and *comm/reverse* and *comm/pair/reverse*
keywords determine whether the host or device performs the packing and
unpacking of data when communicating per-atom data between processors.
"Exchange" communication happens only on timesteps that neighbor lists
@ -521,9 +522,16 @@ packing/unpacking data for the communication. A value of *host* means to
use the host, typically a multi-core CPU, and perform the
packing/unpacking in parallel with threads. A value of *device* means to
use the device, typically a GPU, to perform the packing/unpacking
operation. If a value of *host* is used for the *comm/pair/forward* or
*comm/fix/forward* keyword, it will be automatically be changed to *no*
since these keywords don't support *host* mode.
operation.
For the *comm/pair/forward* or *comm/fix/forward* or *comm/pair/reverse*
keywords, if a value of *host* is used it will be automatically
be changed to *no* since these keywords don't support *host* mode. The
value of *no* will also always be used when running on the CPU, i.e. setting
the value to *device* will have no effect if the pair/fix style is
running on the CPU. For the *comm/fix/forward* or *comm/pair/reverse*
keywords, not all styles support *device* mode and in that case will run
in *no* mode instead.
The optimal choice for these keywords depends on the input script and
the hardware used. The *no* value is useful for verifying that the

View File

@ -1,8 +1,11 @@
.. index:: pair_style meam
.. index:: pair_style meam/kk
pair_style meam command
=========================
Accelerator Variants: *meam/kk*
Syntax
""""""
@ -347,6 +350,12 @@ Most published MEAM parameter sets use the default values *attrac* = *repulse* =
Setting *repuls* = *attrac* = *delta* corresponds to the form used in several
recent published MEAM parameter sets, such as :ref:`(Valone) <Valone>`
----------
.. include:: accel_styles.rst
----------
.. note::
The default form of the *erose* expression in LAMMPS was corrected

View File

@ -182,6 +182,13 @@ action kokkos_base.h
action kokkos_base_fft.h fft3d.h
action kokkos_few.h
action kokkos_type.h
action meam_kokkos.h meam.h
action meam_dens_final_kokkos.h meam_dens_final.cpp
action meam_dens_init_kokkos.h meam_dens_init.cpp
action meam_force_kokkos.h meam_force.cpp
action meam_funcs_kokkos.h meam_funcs.cpp
action meam_impl_kokkos.h meam_impl.cpp
action meam_setup_done_kokkos.h meam_setup_done.cpp
action memory_kokkos.h
action modify_kokkos.cpp
action modify_kokkos.h
@ -207,13 +214,6 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp
action nbin_ssa_kokkos.h nbin_ssa.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action meam_setup_done_kokkos.h meam_setup_done.cpp
action meam_kokkos.h meam.h
action meam_impl_kokkos.h meam_impl.cpp
action meam_funcs_kokkos.h meam_funcs.cpp
action meam_force_kokkos.h meam_force.cpp
action meam_dens_init_kokkos.h meam_dens_init.cpp
action meam_dens_final_kokkos.h meam_dens_final.cpp
action min_cg_kokkos.cpp
action min_cg_kokkos.h
action min_kokkos.cpp
@ -294,8 +294,8 @@ action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp
action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h
action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
action pair_meam_kokkos.h pair_meam.h
action pair_meam_kokkos.cpp pair_meam.cpp
action pair_meam_kokkos.h pair_meam.h
action pair_morse_kokkos.cpp
action pair_morse_kokkos.h
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp

View File

@ -109,6 +109,7 @@ void CommKokkos::init()
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
forward_comm_classic = lmp->kokkos->forward_comm_classic;
forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
reverse_pair_comm_classic = lmp->kokkos->reverse_pair_comm_classic;
forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
@ -478,12 +479,13 @@ void CommKokkos::forward_comm_device(Pair *pair)
int nsize = pair->comm_forward;
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
int nmax = max_buf_pair;
for (iswap = 0; iswap < nswap; iswap++) {
int n = MAX(max_buf_pair,nsize*sendnum[iswap]);
n = MAX(n,nsize*recvnum[iswap]);
if (n > max_buf_pair)
grow_buf_pair(n);
nmax = MAX(nmax,nsize*sendnum[iswap]);
nmax = MAX(nmax,nsize*recvnum[iswap]);
}
if (nmax > max_buf_pair)
grow_buf_pair(nmax);
for (iswap = 0; iswap < nswap; iswap++) {
@ -545,8 +547,76 @@ void CommKokkos::grow_buf_fix(int n) {
void CommKokkos::reverse_comm(Pair *pair)
{
if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm(pair);
} else {
k_sendlist.sync<LMPDeviceType>();
reverse_comm_device<LMPDeviceType>(pair);
}
}
template<class DeviceType>
void CommKokkos::reverse_comm_device(Pair *pair)
{
int iswap,n;
MPI_Request request;
DAT::tdual_xfloat_1d k_buf_tmp;
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
int nsize = MAX(pair->comm_reverse,pair->comm_reverse_off);
int nmax = max_buf_pair;
for (iswap = 0; iswap < nswap; iswap++) {
nmax = MAX(nmax,nsize*sendnum[iswap]);
nmax = MAX(nmax,nsize*recvnum[iswap]);
}
if (nmax > max_buf_pair)
grow_buf_pair(nmax);
for (iswap = nswap-1; iswap >= 0; iswap--) {
// pack buffer
n = pairKKBase->pack_reverse_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send_pair);
DeviceType().fence();
// exchange with another proc
// if self, set recv buffer to send buffer
double* buf_send_pair;
double* buf_recv_pair;
if (lmp->kokkos->gpu_aware_flag) {
buf_send_pair = k_buf_send_pair.view<DeviceType>().data();
buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data();
} else {
k_buf_send_pair.modify<DeviceType>();
k_buf_send_pair.sync<LMPHostType>();
buf_send_pair = k_buf_send_pair.h_view.data();
buf_recv_pair = k_buf_recv_pair.h_view.data();
}
if (sendproc[iswap] != me) {
if (sendnum[iswap])
MPI_Irecv(buf_recv_pair,nsize*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world,&request);
if (recvnum[iswap])
MPI_Send(buf_send_pair,n,MPI_DOUBLE,recvproc[iswap],0,world);
if (sendnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
if (!lmp->kokkos->gpu_aware_flag) {
k_buf_recv_pair.modify<LMPHostType>();
k_buf_recv_pair.sync<DeviceType>();
}
k_buf_tmp = k_buf_recv_pair;
} else k_buf_tmp = k_buf_send_pair;
// unpack buffer
pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_tmp);
DeviceType().fence();
}
}
void CommKokkos::forward_comm(Dump *dump)

View File

@ -27,6 +27,7 @@ class CommKokkos : public CommBrick {
bool exchange_comm_classic;
bool forward_comm_classic;
bool forward_pair_comm_classic;
bool reverse_pair_comm_classic;
bool forward_fix_comm_classic;
bool reverse_comm_classic;
bool exchange_comm_on_host;
@ -58,6 +59,7 @@ class CommKokkos : public CommBrick {
template<class DeviceType> void forward_comm_device(int dummy);
template<class DeviceType> void reverse_comm_device();
template<class DeviceType> void forward_comm_device(Pair *pair);
template<class DeviceType> void reverse_comm_device(Pair *pair);
template<class DeviceType> void forward_comm_device(Fix *fix, int size=0);
template<class DeviceType> void exchange_device();
template<class DeviceType> void borders_device();

View File

@ -91,6 +91,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
exchange_comm_changed = 0;
forward_comm_changed = 0;
forward_pair_comm_changed = 0;
reverse_pair_comm_changed = 0;
forward_fix_comm_changed = 0;
reverse_comm_changed = 0;
@ -239,7 +240,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
newtonflag = 0;
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else {
@ -253,7 +254,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
newtonflag = 1;
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
}
@ -394,17 +395,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else if (strcmp(arg[iarg+1],"host") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
@ -441,6 +442,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
else error->all(FLERR,"Illegal package kokkos command");
forward_pair_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/pair/reverse") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) reverse_pair_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) reverse_pair_comm_classic = 1;
else if (strcmp(arg[iarg+1],"device") == 0) reverse_pair_comm_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
reverse_pair_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
@ -515,6 +524,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_pair_comm_classic = 1;
forward_pair_comm_changed = 1;
}
if (reverse_pair_comm_classic == 0) {
reverse_pair_comm_classic = 1;
reverse_pair_comm_changed = 1;
}
if (forward_fix_comm_classic == 0) {
forward_fix_comm_classic = 1;
forward_fix_comm_changed = 1;
@ -540,6 +553,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_pair_comm_classic = 0;
forward_pair_comm_changed = 0;
}
if (reverse_pair_comm_changed) {
reverse_pair_comm_classic = 0;
reverse_pair_comm_changed = 0;
}
if (forward_fix_comm_changed) {
forward_fix_comm_classic = 0;
forward_fix_comm_changed = 0;

View File

@ -30,6 +30,7 @@ class KokkosLMP : protected Pointers {
int exchange_comm_classic;
int forward_comm_classic;
int forward_pair_comm_classic;
int reverse_pair_comm_classic;
int forward_fix_comm_classic;
int reverse_comm_classic;
int exchange_comm_on_host;
@ -38,6 +39,7 @@ class KokkosLMP : protected Pointers {
int exchange_comm_changed;
int forward_comm_changed;
int forward_pair_comm_changed;
int reverse_pair_comm_changed;
int forward_fix_comm_changed;
int reverse_comm_changed;
int nthreads,ngpus;

View File

@ -29,6 +29,10 @@ class KokkosBase {
int, int *) {return 0;};
virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
virtual int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {return 0;};
virtual void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
int, DAT::tdual_xfloat_1d &) {}
// Fix
virtual int pack_forward_comm_fix_kokkos(int, DAT::tdual_int_2d,
int, DAT::tdual_xfloat_1d &,

View File

@ -1,11 +1,202 @@
// clang-format off
#include "math_special_kokkos.h"
#include <cmath>
#include <cstdint>
#include <cstdint> // IWYU pragma: keep
#include <limits>
using namespace LAMMPS_NS;
static constexpr int nmaxfactorial = 167;
/* ----------------------------------------------------------------------
factorial n table, size nmaxfactorial+1
------------------------------------------------------------------------- */
static const double nfac_table[] = {
1,
1,
2,
6,
24,
120,
720,
5040,
40320,
362880,
3628800,
39916800,
479001600,
6227020800,
87178291200,
1307674368000,
20922789888000,
355687428096000,
6.402373705728e+15,
1.21645100408832e+17,
2.43290200817664e+18,
5.10909421717094e+19,
1.12400072777761e+21,
2.5852016738885e+22,
6.20448401733239e+23,
1.5511210043331e+25,
4.03291461126606e+26,
1.08888694504184e+28,
3.04888344611714e+29,
8.8417619937397e+30,
2.65252859812191e+32,
8.22283865417792e+33,
2.63130836933694e+35,
8.68331761881189e+36,
2.95232799039604e+38,
1.03331479663861e+40,
3.71993326789901e+41,
1.37637530912263e+43,
5.23022617466601e+44,
2.03978820811974e+46,
8.15915283247898e+47,
3.34525266131638e+49,
1.40500611775288e+51,
6.04152630633738e+52,
2.65827157478845e+54,
1.1962222086548e+56,
5.50262215981209e+57,
2.58623241511168e+59,
1.24139155925361e+61,
6.08281864034268e+62,
3.04140932017134e+64,
1.55111875328738e+66,
8.06581751709439e+67,
4.27488328406003e+69,
2.30843697339241e+71,
1.26964033536583e+73,
7.10998587804863e+74,
4.05269195048772e+76,
2.35056133128288e+78,
1.3868311854569e+80,
8.32098711274139e+81,
5.07580213877225e+83,
3.14699732603879e+85,
1.98260831540444e+87,
1.26886932185884e+89,
8.24765059208247e+90,
5.44344939077443e+92,
3.64711109181887e+94,
2.48003554243683e+96,
1.71122452428141e+98,
1.19785716699699e+100,
8.50478588567862e+101,
6.12344583768861e+103,
4.47011546151268e+105,
3.30788544151939e+107,
2.48091408113954e+109,
1.88549470166605e+111,
1.45183092028286e+113,
1.13242811782063e+115,
8.94618213078297e+116,
7.15694570462638e+118,
5.79712602074737e+120,
4.75364333701284e+122,
3.94552396972066e+124,
3.31424013456535e+126,
2.81710411438055e+128,
2.42270953836727e+130,
2.10775729837953e+132,
1.85482642257398e+134,
1.65079551609085e+136,
1.48571596448176e+138,
1.3520015276784e+140,
1.24384140546413e+142,
1.15677250708164e+144,
1.08736615665674e+146,
1.03299784882391e+148,
9.91677934870949e+149,
9.61927596824821e+151,
9.42689044888324e+153,
9.33262154439441e+155,
9.33262154439441e+157,
9.42594775983835e+159,
9.61446671503512e+161,
9.90290071648618e+163,
1.02990167451456e+166,
1.08139675824029e+168,
1.14628056373471e+170,
1.22652020319614e+172,
1.32464181945183e+174,
1.44385958320249e+176,
1.58824554152274e+178,
1.76295255109024e+180,
1.97450685722107e+182,
2.23119274865981e+184,
2.54355973347219e+186,
2.92509369349301e+188,
3.3931086844519e+190,
3.96993716080872e+192,
4.68452584975429e+194,
5.5745857612076e+196,
6.68950291344912e+198,
8.09429852527344e+200,
9.8750442008336e+202,
1.21463043670253e+205,
1.50614174151114e+207,
1.88267717688893e+209,
2.37217324288005e+211,
3.01266001845766e+213,
3.8562048236258e+215,
4.97450422247729e+217,
6.46685548922047e+219,
8.47158069087882e+221,
1.118248651196e+224,
1.48727070609069e+226,
1.99294274616152e+228,
2.69047270731805e+230,
3.65904288195255e+232,
5.01288874827499e+234,
6.91778647261949e+236,
9.61572319694109e+238,
1.34620124757175e+241,
1.89814375907617e+243,
2.69536413788816e+245,
3.85437071718007e+247,
5.5502938327393e+249,
8.04792605747199e+251,
1.17499720439091e+254,
1.72724589045464e+256,
2.55632391787286e+258,
3.80892263763057e+260,
5.71338395644585e+262,
8.62720977423323e+264,
1.31133588568345e+267,
2.00634390509568e+269,
3.08976961384735e+271,
4.78914290146339e+273,
7.47106292628289e+275,
1.17295687942641e+278,
1.85327186949373e+280,
2.94670227249504e+282,
4.71472363599206e+284,
7.59070505394721e+286,
1.22969421873945e+289,
2.0044015765453e+291,
3.28721858553429e+293,
5.42391066613159e+295,
9.00369170577843e+297,
1.503616514865e+300, // nmaxfactorial = 167
};
/* ----------------------------------------------------------------------
factorial n vial lookup from precomputed table
------------------------------------------------------------------------- */
double MathSpecialKokkos::factorial(const int n)
{
if (n < 0 || n > nmaxfactorial)
return std::numeric_limits<double>::quiet_NaN();
return nfac_table[n];
}
/* Library libcerf:
* Compute complex error functions, based on a new implementation of
* Faddeeva's w_of_z. Also provide Dawson and Voigt functions.
@ -477,59 +668,3 @@ double MathSpecialKokkos::erfcx_y100(const double y100)
return 1.0;
} /* erfcx_y100 */
/* optimizer friendly implementation of exp2(x).
*
* strategy:
*
* split argument into an integer part and a fraction:
* ipart = floor(x+0.5);
* fpart = x - ipart;
*
* compute exp2(ipart) from setting the ieee754 exponent
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
*
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
*/
/* IEEE 754 double precision floating point data manipulation */
typedef union
{
double f;
uint64_t u;
struct {int32_t i0,i1;} s;
} udi_t;
static const double fm_exp2_q[] = {
/* 1.00000000000000000000e0, */
2.33184211722314911771e2,
4.36821166879210612817e3
};
static const double fm_exp2_p[] = {
2.30933477057345225087e-2,
2.02020656693165307700e1,
1.51390680115615096133e3
};
double MathSpecialKokkos::exp2_x86(double x)
{
double ipart, fpart, px, qx;
udi_t epart;
ipart = floor(x+0.5);
fpart = x - ipart;
epart.s.i0 = 0;
epart.s.i1 = (((int) ipart) + 1023) << 20;
x = fpart*fpart;
px = fm_exp2_p[0];
px = px*x + fm_exp2_p[1];
qx = x + fm_exp2_q[0];
px = px*x + fm_exp2_p[2];
qx = qx*x + fm_exp2_q[1];
px = px * fpart;
x = 1.0 + 2.0*(px/(qx-px));
return epart.f*x;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -15,29 +14,159 @@
#ifndef LMP_MATH_SPECIAL_KOKKOS_H
#define LMP_MATH_SPECIAL_KOKKOS_H
#include <cmath>
#include "kokkos_type.h"
#include <cmath>
namespace LAMMPS_NS {
namespace MathSpecialKokkos {
/*! Fast tabulated factorial function
*
* This function looks up pre-computed factorial values for arguments of n = 0
* to a maximum of 167, which is the maximal value representable by a double
* precision floating point number. For other values of n a NaN value is returned.
*
* \param n argument (valid: 0 <= n <= 167)
* \return value of n! as double precision number or NaN */
extern double factorial(const int n);
/* optimizer friendly implementation of exp2(x).
*
* strategy:
*
* split argument into an integer part and a fraction:
* ipart = floor(x+0.5);
* fpart = x - ipart;
*
* compute exp2(ipart) from setting the ieee754 exponent
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
*
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
*/
/* IEEE 754 double precision floating point data manipulation */
typedef union
{
double f;
uint64_t u;
struct {int32_t i0,i1;} s;
} udi_t;
/* double precision constants */
#define FM_DOUBLE_LOG2OFE 1.4426950408889634074
/*! Fast implementation of 2^x without argument checks for little endian CPUs
*
* This function implements an optimized version of pow(2.0, x) that does not
* check for valid arguments and thus may only be used where arguments are well
* behaved. The implementation makes assumptions about the layout of double
* precision floating point numbers in memory and thus will only work on little
* endian CPUs. If little endian cannot be safely detected, the result of
* calling pow(2.0, x) will be returned. This function also is the basis for
* the fast exponential fm_exp(x).
*
* \param x argument
* \return value of 2^x as double precision number */
KOKKOS_INLINE_FUNCTION
static double exp2_x86(double x)
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
double ipart, fpart, px, qx;
udi_t epart;
const double fm_exp2_q[2] = {
/* 1.00000000000000000000e0, */
2.33184211722314911771e2,
4.36821166879210612817e3
};
const double fm_exp2_p[3] = {
2.30933477057345225087e-2,
2.02020656693165307700e1,
1.51390680115615096133e3
};
ipart = floor(x+0.5);
fpart = x - ipart;
epart.s.i0 = 0;
epart.s.i1 = (((int) ipart) + 1023) << 20;
x = fpart*fpart;
px = fm_exp2_p[0];
px = px*x + fm_exp2_p[1];
qx = x + fm_exp2_q[0];
px = px*x + fm_exp2_p[2];
qx = qx*x + fm_exp2_q[1];
px = px * fpart;
x = 1.0 + 2.0*(px/(qx-px));
return epart.f*x;
#else
return pow(2.0, x);
#endif
}
/*! Fast implementation of exp(x) for little endian CPUs
*
* This function implements an optimized version of exp(x) for little endian CPUs.
* It calls the exp2_x86(x) function with a suitable prefactor to x to return exp(x).
* The implementation makes assumptions about the layout of double
* precision floating point numbers in memory and thus will only work on little
* endian CPUs. If little endian cannot be safely detected, the result of
* calling the exp(x) implementation in the standard math library will be returned.
*
* \param x argument
* \return value of e^x as double precision number */
KOKKOS_INLINE_FUNCTION
static double fm_exp(double x)
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
if (x < -1022.0/FM_DOUBLE_LOG2OFE) return 0;
if (x > 1023.0/FM_DOUBLE_LOG2OFE) return INFINITY;
return exp2_x86(FM_DOUBLE_LOG2OFE * x);
#else
return ::exp(x);
#endif
}
// support function for scaled error function complement
extern double erfcx_y100(const double y100);
// fast 2**x function without argument checks for little endian CPUs
extern double exp2_x86(double x);
// scaled error function complement exp(x*x)*erfc(x) for coul/long styles
/*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
*
* This is a portable fast implementation of exp(x*x)*erfc(x) that can be used
* in coul/long pair styles as a replacement for the polynomial expansion that
* is/was widely used. Unlike the polynomial expansion, that is only accurate
* at the level of single precision floating point it provides full double precision
* accuracy, but at comparable speed (unlike the erfc() implementation shipped
* with GNU standard math library).
*
* \param x argument
* \return value of e^(x*x)*erfc(x) */
static inline double my_erfcx(const double x)
{
if (x >= 0.0) return erfcx_y100(400.0/(4.0+x));
else return 2.0*exp(x*x) - erfcx_y100(400.0/(4.0-x));
if (x >= 0.0)
return erfcx_y100(400.0 / (4.0 + x));
else
return 2.0 * exp(x * x) - erfcx_y100(400.0 / (4.0 - x));
}
// exp(-x*x) for coul/long styles
/*! Fast implementation of exp(-x*x) for little endian CPUs for coul/long styles
*
* This function implements an optimized version of exp(-x*x) based on exp2_x86()
* for use with little endian CPUs. If little endian cannot be safely detected,
* the result of calling the exp(-x*x) implementation in the standard math
* library will be returned.
*
* \param x argument
* \return value of e^(-x*x) as double precision number */
static inline double expmsq(double x)
{
@ -50,63 +179,75 @@ namespace MathSpecialKokkos {
#endif
}
#define FM_DOUBLE_LOG2OFE 1.4426950408889634074
// fast e**x function for little endian CPUs, falls back to libc on other platforms
KOKKOS_INLINE_FUNCTION
static double fm_exp(double x)
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
return exp2_x86(FM_DOUBLE_LOG2OFE * x);
#else
return ::exp(x);
#endif
}
/*! Fast inline version of pow(x, 2.0)
*
* \param x argument
* \return x*x */
// x**2, use instead of pow(x,2.0)
KOKKOS_INLINE_FUNCTION
static double square(const double &x) { return x*x; }
static double square(const double &x) { return x * x; }
/*! Fast inline version of pow(x, 3.0)
*
* \param x argument
* \return x*x */
// x**3, use instead of pow(x,3.0)
KOKKOS_INLINE_FUNCTION
static double cube(const double &x) { return x*x*x; }
static double cube(const double &x) { return x * x * x; }
/* Fast inline version of pow(-1.0, n)
*
* \param n argument (integer)
* \return -1 if n is odd, 1.0 if n is even */
// return -1.0 for odd n, 1.0 for even n, like pow(-1.0,n)
KOKKOS_INLINE_FUNCTION
static double powsign(const int n) { return (n & 1) ? -1.0 : 1.0; }
// optimized version of pow(x,n) with n being integer
// up to 10x faster than pow(x,y)
/* Fast inline version of pow(x,n) for integer n
*
* This is a version of pow(x,n) optimized for n being integer.
* Speedups of up to 10x faster than pow(x,y) have been measured.
*
* \param n argument (integer)
* \return value of x^n */
KOKKOS_INLINE_FUNCTION
static double powint(const double &x, const int n) {
double yy,ww;
static double powint(const double &x, const int n)
{
double yy, ww;
if (x == 0.0) return 0.0;
int nn = (n > 0) ? n : -n;
ww = x;
for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww)
for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww)
if (nn & 1) yy *= ww;
return (n > 0) ? yy : 1.0/yy;
return (n > 0) ? yy : 1.0 / yy;
}
// optimized version of (sin(x)/x)**n with n being a _positive_ integer
/* Fast inline version of (sin(x)/x)^n as used by PPPM kspace styles
*
* This is an optimized function to compute (sin(x)/x)^n as frequently used by PPPM.
*
* \param n argument (integer). Expected to be positive.
* \return value of (sin(x)/x)^n */
KOKKOS_INLINE_FUNCTION
static double powsinxx(const double &x, int n) {
double yy,ww;
static double powsinxx(const double &x, int n)
{
double yy, ww;
if (x == 0.0) return 1.0;
ww = sin(x)/x;
ww = sin(x) / x;
for (yy = 1.0; n != 0; n >>= 1, ww *=ww)
for (yy = 1.0; n != 0; n >>= 1, ww *= ww)
if (n & 1) yy *= ww;
return yy;
}
}
}
} // namespace MathSpecialKokkos
} // namespace LAMMPS_NS
#endif

View File

@ -3,179 +3,148 @@
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void
MEAMKokkos<DeviceType>::meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom, double* eng_vdwl,
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, int& errorflag)
MEAMKokkos<DeviceType>::meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom,
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_int_2d d_scale, int& errorflag, EV_FLOAT &ev_all)
{
EV_FLOAT ev;
ev.evdwl = *eng_vdwl;
this->eflag_either = eflag_either;
this->eflag_global = eflag_global;
this->eflag_atom = eflag_atom;
this->d_eatom = eatom;
this->ntype = ntype;
this->type = type;
this->fmap = fmap;
this->d_map = d_map;
this->d_scale = d_scale;
Kokkos::deep_copy(d_errorflag,0);
// Complete the calculation of density
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensFinal>(0,nlocal),*this,ev);
*eng_vdwl = ev.evdwl;
ev_all.evdwl =+ ev.evdwl;
copymode = 0;
auto h_errorflag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_errorflag);
errorflag = h_errorflag();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensFinal, const int &i, EV_FLOAT& ev) const {
lattice_t elti;
int m;
int errorflag;
F_FLOAT rhob, G, dG, Gbar, dGbar, gam, shp[3], Z;
F_FLOAT B, denom, rho_bkgd;
F_FLOAT denom, rho_bkgd, Fl;
double scaleii;
// may or may not be legal
elti = static_cast<lattice_t>(fmap[type[i]]);
int elti = d_map[type[i]];
if (elti >= 0) {
scaleii = d_scale(type[i],type[i]);
d_rho1[i] = 0.0;
d_rho2[i] = -1.0 / 3.0 * d_arho2b[i] * d_arho2b[i];
d_rho3[i] = 0.0;
for (m = 0; m < 3; m++) {
d_rho1[i] = d_rho1[i] + d_arho1(i,m) * d_arho1(i,m);
d_rho3[i] = d_rho3[i] - 3.0 / 5.0 * d_arho3b(i,m) * d_arho3b(i,m);
}
for (m = 0; m < 6; m++) {
d_rho2[i] = d_rho2[i] + v2D[m] * d_arho2(i,m) * d_arho2(i,m);
}
for (m = 0; m < 10; m++) {
d_rho3[i] = d_rho3[i] + v3D[m] * d_arho3(i,m) * d_arho3(i,m);
for (int m = 0; m < 3; m++) {
d_rho1[i] += d_arho1(i,m) * d_arho1(i,m);
d_rho3[i] -= 3.0 / 5.0 * d_arho3b(i,m) * d_arho3b(i,m);
}
for (int m = 0; m < 6; m++)
d_rho2[i] += v2D[m] * d_arho2(i,m) * d_arho2(i,m);
for (int m = 0; m < 10; m++)
d_rho3[i] += v3D[m] * d_arho3(i,m) * d_arho3(i,m);
if (d_rho0[i] > 0.0) {
if (this->ialloy == 1) {
if (ialloy == 1) {
d_t_ave(i,0) = fdiv_zero_kk(d_t_ave(i,0), d_tsq_ave(i,0));
d_t_ave(i,1) = fdiv_zero_kk(d_t_ave(i,1), d_tsq_ave(i,1));
d_t_ave(i,2) = fdiv_zero_kk(d_t_ave(i,2), d_tsq_ave(i,2));
} else if (this->ialloy == 2) {
d_t_ave(i,0) = this->t1_meam[elti];
d_t_ave(i,1) = this->t2_meam[elti];
d_t_ave(i,2) = this->t3_meam[elti];
} else if (ialloy == 2) {
d_t_ave(i,0) = t1_meam[elti];
d_t_ave(i,1) = t2_meam[elti];
d_t_ave(i,2) = t3_meam[elti];
} else {
d_t_ave(i,0) = d_t_ave(i,0) / d_rho0[i];
d_t_ave(i,1) = d_t_ave(i,1) / d_rho0[i];
d_t_ave(i,2) = d_t_ave(i,2) / d_rho0[i];
d_t_ave(i,0) /= d_rho0[i];
d_t_ave(i,1) /= d_rho0[i];
d_t_ave(i,2) /= d_rho0[i];
}
}
d_gamma[i] = d_t_ave(i,0) * d_rho1[i] + d_t_ave(i,1) * d_rho2[i] + d_t_ave(i,2) * d_rho3[i];
if (d_rho0[i] > 0.0) {
d_gamma[i] = d_gamma[i] / (d_rho0[i] * d_rho0[i]);
}
if (d_rho0[i] > 0.0)
d_gamma[i] /= (d_rho0[i] * d_rho0[i]);
// need to double check, the analogous function is
// Z = get_Zij(this->lattce_meam[elti][elti]); in the non-KOKKOS version?
Z = get_Zij(elti);
Z = get_Zij(lattce_meam[elti][elti]);
G = G_gam(d_gamma[i], this->ibar_meam[elti], errorflag);
if (errorflag != 0)
{
//char str[128];
//sprintf(str,"MEAMKokkos library error %d",errorflag);
//error->one(FLERR,str);
G = G_gam(d_gamma[i], ibar_meam[elti], d_errorflag());
if (d_errorflag() != 0)
return;
}
get_shpfcn(this->lattce_meam[elti][elti], shp);
if (this->ibar_meam[elti] <= 0) {
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shp);
if (ibar_meam[elti] <= 0) {
Gbar = 1.0;
dGbar = 0.0;
} else {
if (this->mix_ref_t == 1) {
if (mix_ref_t == 1)
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
} else {
gam = (this->t1_meam[elti] * shp[0] + this->t2_meam[elti] * shp[1] + this->t3_meam[elti] * shp[2]) /
else
gam = (t1_meam[elti] * shp[0] + t2_meam[elti] * shp[1] + t3_meam[elti] * shp[2]) /
(Z * Z);
}
Gbar = G_gam(gam, this->ibar_meam[elti], errorflag);
Gbar = G_gam(gam, ibar_meam[elti], d_errorflag());
}
d_rho[i] = d_rho0[i] * G;
if (this->mix_ref_t == 1) {
if (this->ibar_meam[elti] <= 0) {
if (mix_ref_t == 1) {
if (ibar_meam[elti] <= 0) {
Gbar = 1.0;
dGbar = 0.0;
} else {
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
Gbar = dG_gam(gam, this->ibar_meam[elti], dGbar);
Gbar = dG_gam(gam, ibar_meam[elti], dGbar);
}
rho_bkgd = this->rho0_meam[elti] * Z * Gbar;
rho_bkgd = rho0_meam[elti] * Z * Gbar;
} else {
if (this->bkgd_dyn == 1) {
rho_bkgd = this->rho0_meam[elti] * Z;
} else {
rho_bkgd = this->rho_ref_meam[elti];
}
if (bkgd_dyn == 1)
rho_bkgd = rho0_meam[elti] * Z;
else
rho_bkgd = rho_ref_meam[elti];
}
rhob = d_rho[i] / rho_bkgd;
denom = 1.0 / rho_bkgd;
G = dG_gam(d_gamma[i], this->ibar_meam[elti], dG);
G = dG_gam(d_gamma[i], ibar_meam[elti], dG);
d_dgamma1[i] = (G - 2 * dG * d_gamma[i]) * denom;
if (!iszero_kk(d_rho0[i])) {
if (!iszero_kk(d_rho0[i]))
d_dgamma2[i] = (dG / d_rho0[i]) * denom;
} else {
else
d_dgamma2[i] = 0.0;
}
// dgamma3 is nonzero only if we are using the "mixed" rule for
// computing t in the reference system (which is not correct, but
// included for backward compatibility
if (this->mix_ref_t == 1) {
if (mix_ref_t == 1)
d_dgamma3[i] = d_rho0[i] * G * dGbar / (Gbar * Z * Z) * denom;
} else {
else
d_dgamma3[i] = 0.0;
}
B = this->A_meam[elti] * this->Ec_meam[elti][elti];
Fl = embedding(A_meam[elti], Ec_meam[elti][elti], rhob, d_frhop[i]);
if (!iszero_kk(rhob)) {
if (this->emb_lin_neg == 1 && rhob <= 0) {
d_frhop[i] = -B;
} else {
d_frhop[i] = B * (log(rhob) + 1.0);
if (eflag_either) {
Fl *= scaleii;
if (eflag_global) {
ev.evdwl += Fl;
}
if (eflag_either != 0) {
if (eflag_global != 0) {
if (this->emb_lin_neg == 1 && rhob <= 0) {
//*eng_vdwl = *eng_vdwl - B * rhob;
ev.evdwl = ev.evdwl - B * rhob;
} else {
//*eng_vdwl = *eng_vdwl + B * rhob * log(rhob);
ev.evdwl = ev.evdwl + B * rhob * log(rhob);
if (eflag_atom) {
d_eatom[i] += Fl;
}
}
if (eflag_atom != 0) {
if (this->emb_lin_neg == 1 && rhob <= 0) {
d_eatom[i] = d_eatom[i] - B * rhob;
} else {
d_eatom[i] = d_eatom[i] + B * rhob * log(rhob);
}
}
}
} else {
if (this->emb_lin_neg == 1) {
d_frhop[i] = -B;
} else {
d_frhop[i] = B;
}
}
}
if (errorflag)
{
//char str[128];
//sprintf(str,"MEAMKokkos library error %d",errorflag);
//error->one(FLERR,str);
}
}

View File

@ -1,29 +1,31 @@
#include "meam_kokkos.h"
#include "math_special.h"
#include "mpi.h"
#include "math_special_kokkos.h"
using namespace LAMMPS_NS;
using namespace MathSpecialKokkos;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensInit<NEIGHFLAG>, const int &i, EV_FLOAT &ev) const {
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensInit<NEIGHFLAG>, const int &i) const {
int ii, offsetval;
ii = d_ilist_half[i];
offsetval = d_offset[i];
// Compute screening function and derivatives
// compute screening function and derivatives
this->template getscreen<NEIGHFLAG>(ii, offsetval, x, d_numneigh_half,
d_numneigh_full, ntype, type, fmap);
d_numneigh_full, ntype, type, d_map);
// Calculate intermediate density terms to be communicated
this->template calc_rho1<NEIGHFLAG>(ii, ntype, type, fmap, x, d_numneigh_half, offsetval);
ev.evdwl += d_numneigh_half[i];
// calculate intermediate density terms to be communicated
this->template calc_rho1<NEIGHFLAG>(ii, ntype, type, d_map, x, d_numneigh_half, offsetval);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void MEAMKokkos<DeviceType>::operator()(TagMEAMInitialize, const int &i) const {
void MEAMKokkos<DeviceType>::operator()(TagMEAMZero, const int &i) const {
d_rho0[i] = 0.0;
d_arho2b[i] = 0.0;
d_arho1(i,0) = d_arho1(i,1) = d_arho1(i,2) = 0.0;
@ -36,6 +38,8 @@ void MEAMKokkos<DeviceType>::operator()(TagMEAMInitialize, const int &i) const {
d_tsq_ave(i,0) = d_tsq_ave(i,1) = d_tsq_ave(i,2) = 0.0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void
MEAMKokkos<DeviceType>::meam_dens_setup(int atom_nmax, int nall, int n_neigh)
@ -155,20 +159,21 @@ MEAMKokkos<DeviceType>::meam_dens_setup(int atom_nmax, int nall, int n_neigh)
// zero out local arrays
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMInitialize>(0, nall),*this);
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMZero>(0, nall),*this);
copymode = 0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void
MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread d_numneigh_half, typename AT::t_int_1d_randomread d_numneigh_full,
int *fnoffset, typename AT::t_int_1d_randomread d_ilist_half, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, typename AT::t_int_1d_randomread d_offset, int neighflag)
MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half, typename AT::t_int_1d d_numneigh_full,
typename AT::t_int_1d d_ilist_half, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, typename AT::t_int_1d d_offset, int neighflag, int need_dup)
{
EV_FLOAT ev;
ev.evdwl = 0;
this->ntype = ntype;
this->type = type;
this->fmap = fmap;
this->d_map = d_map;
this->x = x;
this->d_numneigh_half = d_numneigh_half;
this->d_numneigh_full = d_numneigh_full;
@ -176,102 +181,137 @@ MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_
this->d_neighbors_half = d_neighbors_half;
this->d_neighbors_full = d_neighbors_full;
this->d_offset = d_offset;
if (neighflag == FULL)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<FULL> >(0,inum_half),*this, ev);
else if (neighflag == HALF)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALF> >(0,inum_half),*this, ev);
this->nlocal = nlocal;
if (need_dup) {
dup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho0);
dup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2b);
dup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho1);
dup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2);
dup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3);
dup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3b);
dup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_t_ave);
dup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_tsq_ave);
} else {
ndup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho0);
ndup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2b);
ndup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho1);
ndup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2);
ndup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3);
ndup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3b);
ndup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_t_ave);
ndup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_tsq_ave);
}
copymode = 1;
if (neighflag == HALF)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALF> >(0,inum_half),*this);
else if (neighflag == HALFTHREAD)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALFTHREAD> >(0,inum_half),*this, ev);
*fnoffset = (int)ev.evdwl;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALFTHREAD> >(0,inum_half),*this);
copymode = 0;
if (need_dup) {
Kokkos::Experimental::contribute(d_rho0, dup_rho0);
Kokkos::Experimental::contribute(d_arho2b, dup_arho2b);
Kokkos::Experimental::contribute(d_arho1, dup_arho1);
Kokkos::Experimental::contribute(d_arho2, dup_arho2);
Kokkos::Experimental::contribute(d_arho3, dup_arho3);
Kokkos::Experimental::contribute(d_arho3b, dup_arho3b);
Kokkos::Experimental::contribute(d_t_ave, dup_t_ave);
Kokkos::Experimental::contribute(d_tsq_ave, dup_tsq_ave);
// free duplicated memory
dup_rho0 = decltype(dup_rho0)();
dup_arho2b = decltype(dup_arho2b)();
dup_arho1 = decltype(dup_arho1)();
dup_arho2 = decltype(dup_arho2)();
dup_arho3 = decltype(dup_arho3)();
dup_arho3b = decltype(dup_arho3b)();
dup_t_ave = decltype(dup_t_ave)();
dup_tsq_ave = decltype(dup_tsq_ave)();
}
}
// ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void
MEAMKokkos<DeviceType>::getscreen(int i, int offset, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh_half,
typename AT::t_int_1d_randomread numneigh_full, int /*ntype*/, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap)
MEAMKokkos<DeviceType>::getscreen(int i, int offset, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half,
typename AT::t_int_1d d_numneigh_full, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map)
const {
int jn, j, kn, k;
int elti, eltj, eltk;
X_FLOAT xitmp, yitmp, zitmp, delxij, delyij, delzij;
F_FLOAT rij2, rij;
X_FLOAT xjtmp, yjtmp, zjtmp, delxik, delyik, delzik;
F_FLOAT rik2 /*,rik*/;
X_FLOAT xktmp, yktmp, zktmp, delxjk, delyjk, delzjk;
F_FLOAT rjk2 /*,rjk*/;
X_FLOAT xik, xjk;
F_FLOAT sij, fcij, sfcij, dfcij, sikj, dfikj, cikj;
F_FLOAT Cmin, Cmax, delc, /*ebound,*/ a, coef1, coef2;
F_FLOAT dCikj;
F_FLOAT rnorm, fc, dfc, drinv;
drinv = 1.0 / this->delr_meam;
elti = fmap[type[i]];
const double drinv = 1.0 / delr_meam;
const int elti = d_map[type[i]];
if (elti < 0) return;
xitmp = x(i,0);
yitmp = x(i,1);
zitmp = x(i,2);
const double xitmp = x(i,0);
const double yitmp = x(i,1);
const double zitmp = x(i,2);
for (jn = 0; jn < numneigh_half[i]; jn++) {
//j = firstneigh[jn];
j = d_neighbors_half(i,jn);
for (int jn = 0; jn < d_numneigh_half[i]; jn++) {
const int j = d_neighbors_half(i,jn);
eltj = fmap[type[j]];
const int eltj = d_map[type[j]];
if (eltj < 0) continue;
// First compute screening function itself, sij
xjtmp = x(j,0);
yjtmp = x(j,1);
zjtmp = x(j,2);
delxij = xjtmp - xitmp;
delyij = yjtmp - yitmp;
delzij = zjtmp - zitmp;
const double xjtmp = x(j,0);
const double yjtmp = x(j,1);
const double zjtmp = x(j,2);
const double delxij = xjtmp - xitmp;
const double delyij = yjtmp - yitmp;
const double delzij = zjtmp - zitmp;
rij2 = delxij * delxij + delyij * delyij + delzij * delzij;
rij = sqrt(rij2);
const double rij2 = delxij * delxij + delyij * delyij + delzij * delzij;
double rbound = this->ebound_meam[elti][eltj] * rij2;
if (rij > this->rc_meam) {
fcij = 0.0;
dfcij = 0.0;
sij = 0.0;
} else {
rnorm = (this->rc_meam - rij) * drinv;
sij = 1.0;
if (rij2 > cutforcesq) {
d_dscrfcn[offset+jn] = 0.0;
d_scrfcn[offset+jn] = 0.0;
d_fcpair[offset+jn] = 0.0;
continue;
}
// Now compute derivatives
const double rbound = ebound_meam[elti][eltj] * rij2;
const double rij = sqrt(rij2);
const double rnorm = (cutforce - rij) * drinv;
double sij = 1.0;
// if rjk2 > ebound*rijsq, atom k is definitely outside the ellipse
for (kn = 0; kn < numneigh_full[i]; kn++) {
//k = firstneigh_full[kn];
k = d_neighbors_full(i,kn);
eltk = fmap[type[k]];
if (eltk < 0) continue;
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
int k = d_neighbors_full(i,kn);
if (k == j) continue;
int eltk = d_map[type[k]];
if (eltk < 0) continue;
delxjk = x(k,0) - xjtmp;
delyjk = x(k,1) - yjtmp;
delzjk = x(k,2) - zjtmp;
rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
const double xktmp = x(k,0);
const double yktmp = x(k,1);
const double zktmp = x(k,2);
const double delxjk = xktmp - xjtmp;
const double delyjk = yktmp - yjtmp;
const double delzjk = zktmp - zjtmp;
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
if (rjk2 > rbound) continue;
delxik = x(k,0) - xitmp;
delyik = x(k,1) - yitmp;
delzik = x(k,2) - zitmp;
rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
const double delxik = xktmp - xitmp;
const double delyik = yktmp - yitmp;
const double delzik = zktmp - zitmp;
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
if (rik2 > rbound) continue;
xik = rik2 / rij2;
xjk = rjk2 / rij2;
a = 1 - (xik - xjk) * (xik - xjk);
const double xik = rik2 / rij2;
const double xjk = rjk2 / rij2;
const double a = 1 - (xik - xjk) * (xik - xjk);
// if a < 0, then ellipse equation doesn't describe this case and
// atom k can't possibly screen i-j
if (a <= 0.0) continue;
cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
Cmax = this->Cmax_meam[elti][eltj][eltk];
Cmin = this->Cmin_meam[elti][eltj][eltk];
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
const double Cmax = Cmax_meam[elti][eltj][eltk];
const double Cmin = Cmin_meam[elti][eltj][eltk];
double sikj;
if (cikj >= Cmax) continue;
// note that cikj may be slightly negative (within numerical
// tolerance) if atoms are colinear, so don't reject that case here
@ -280,60 +320,50 @@ const {
sij = 0.0;
break;
} else {
delc = Cmax - Cmin;
const double delc = Cmax - Cmin;
cikj = (cikj - Cmin) / delc;
sikj = fcut(cikj);
}
sij *= sikj;
}
fc = dfcut(rnorm, dfc);
fcij = fc;
dfcij = dfc * drinv;
}
double dfc;
const double fc = dfcut(rnorm, dfc);
const double fcij = fc;
const double dfcij = dfc * drinv;
// Now compute derivatives
d_dscrfcn[offset+jn] = 0.0;
sfcij = sij * fcij;
if (iszero_kk(sfcij) || iszero_kk(sfcij - 1.0))
{
d_scrfcn[offset+jn] = sij;
d_fcpair[offset+jn] = fcij;
continue;
//goto LABEL_100;
}
for (kn = 0; kn < numneigh_full[i]; kn++) {
//k = firstneigh_full[kn];
k = d_neighbors_full(i,kn);
const double sfcij = sij * fcij;
if (!iszero_kk(sfcij) && !isone_kk(sfcij)) {
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
const int k = d_neighbors_full(i,kn);
if (k == j) continue;
eltk = fmap[type[k]];
const int eltk = d_map[type[k]];
if (eltk < 0) continue;
xktmp = x(k,0);
yktmp = x(k,1);
zktmp = x(k,2);
delxjk = xktmp - xjtmp;
delyjk = yktmp - yjtmp;
delzjk = zktmp - zjtmp;
rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
const double delxjk = x(k,0) - xjtmp;
const double delyjk = x(k,1) - yjtmp;
const double delzjk = x(k,2) - zjtmp;
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
if (rjk2 > rbound) continue;
delxik = xktmp - xitmp;
delyik = yktmp - yitmp;
delzik = zktmp - zitmp;
rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
const double delxik = x(k,0) - xitmp;
const double delyik = x(k,1) - yitmp;
const double delzik = x(k,2) - zitmp;
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
if (rik2 > rbound) continue;
xik = rik2 / rij2;
xjk = rjk2 / rij2;
a = 1 - (xik - xjk) * (xik - xjk);
const double xik = rik2 / rij2;
const double xjk = rjk2 / rij2;
const double a = 1 - (xik - xjk) * (xik - xjk);
// if a < 0, then ellipse equation doesn't describe this case and
// atom k can't possibly screen i-j
if (a <= 0.0) continue;
cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
Cmax = this->Cmax_meam[elti][eltj][eltk];
Cmin = this->Cmin_meam[elti][eltj][eltk];
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
const double Cmax = Cmax_meam[elti][eltj][eltk];
const double Cmin = Cmin_meam[elti][eltj][eltk];
if (cikj >= Cmax) {
continue;
// Note that cikj may be slightly negative (within numerical
@ -344,130 +374,132 @@ const {
// Note that we never have 0<cikj<Cmin here, else sij=0
// (rejected above)
} else {
delc = Cmax - Cmin;
const double delc = Cmax - Cmin;
cikj = (cikj - Cmin) / delc;
sikj = dfcut(cikj, dfikj);
coef1 = dfikj / (delc * sikj);
dCikj = dCfunc(rij2, rik2, rjk2);
d_dscrfcn[offset+jn] = d_dscrfcn[offset+jn] + coef1 * dCikj;
double dfikj;
const double sikj = dfcut(cikj, dfikj);
const double coef1 = dfikj / (delc * sikj);
const double dCikj = dCfunc(rij2, rik2, rjk2);
d_dscrfcn[offset+jn] += coef1 * dCikj;
}
}
coef1 = sfcij;
coef2 = sij * dfcij / rij;
const double coef1 = sfcij;
const double coef2 = sij * dfcij / rij;
d_dscrfcn[offset+jn] = d_dscrfcn[offset+jn] * coef1 - coef2;
}
//LABEL_100:
d_scrfcn[offset+jn] = sij;
d_fcpair[offset+jn] = fcij;
}
}
// ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void
MEAMKokkos<DeviceType>::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh,
MEAMKokkos<DeviceType>::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh,
int offset) const
{
int jn, j, m, n, p, elti, eltj;
int nv2, nv3;
X_FLOAT xtmp, ytmp, ztmp, delij[3];
F_FLOAT rij2, rij, sij;
F_FLOAT ai, aj, rhoa0j, rhoa1j, rhoa2j, rhoa3j, A1j, A2j, A3j;
// double G,Gbar,gam,shp[3+1];
F_FLOAT ro0i, ro0j;
F_FLOAT rhoa0i, rhoa1i, rhoa2i, rhoa3i, A1i, A2i, A3i;
// The rho0, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
// Likely to do: replace with duplicated view for OpenMP, atomic view for GPU abstraction
Kokkos::View<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_rho0 = d_rho0;
Kokkos::View<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_arho2b = d_arho2b;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_t_ave = d_t_ave;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_tsq_ave = d_tsq_ave;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_arho1 = d_arho1;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_arho2 = d_arho2;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_arho3 = d_arho3;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_arho3b = d_arho3b;
auto v_rho0 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_rho0),decltype(ndup_rho0)>::get(dup_rho0,ndup_rho0);
auto a_rho0 = v_rho0.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_arho2b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2b),decltype(ndup_arho2b)>::get(dup_arho2b,ndup_arho2b);
auto a_arho2b = v_arho2b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_arho1 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho1),decltype(ndup_arho1)>::get(dup_arho1,ndup_arho1);
auto a_arho1 = v_arho1.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_arho2 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2),decltype(ndup_arho2)>::get(dup_arho2,ndup_arho2);
auto a_arho2 = v_arho2.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_arho3 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3),decltype(ndup_arho3)>::get(dup_arho3,ndup_arho3);
auto a_arho3 = v_arho3.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_arho3b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3b),decltype(ndup_arho3b)>::get(dup_arho3b,ndup_arho3b);
auto a_arho3b = v_arho3b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_t_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_t_ave),decltype(ndup_t_ave)>::get(dup_t_ave,ndup_t_ave);
auto a_t_ave = v_t_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_tsq_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_tsq_ave),decltype(ndup_tsq_ave)>::get(dup_tsq_ave,ndup_tsq_ave);
auto a_tsq_ave = v_tsq_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
elti = fmap[type[i]];
xtmp = x(i,0);
ytmp = x(i,1);
ztmp = x(i,2);
for (jn = 0; jn < numneigh[i]; jn++) {
const int elti = d_map[type[i]];
const double xtmp = x(i,0);
const double ytmp = x(i,1);
const double ztmp = x(i,2);
for (int jn = 0; jn < d_numneigh[i]; jn++) {
if (!iszero_kk(d_scrfcn[offset+jn])) {
//j = firstneigh[jn];
j = d_neighbors_half(i,jn);
sij = d_scrfcn[offset+jn] * d_fcpair[offset+jn];
const int j = d_neighbors_half(i,jn);
const double sij = d_scrfcn[offset+jn] * d_fcpair[offset+jn];
double delij[3];
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
if (rij2 < this->cutforcesq) {
eltj = fmap[type[j]];
rij = sqrt(rij2);
ai = rij / this->re_meam[elti][elti] - 1.0;
aj = rij / this->re_meam[eltj][eltj] - 1.0;
ro0i = this->rho0_meam[elti];
ro0j = this->rho0_meam[eltj];
rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-this->beta0_meam[eltj] * aj) * sij;
rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-this->beta1_meam[eltj] * aj) * sij;
rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-this->beta2_meam[eltj] * aj) * sij;
rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-this->beta3_meam[eltj] * aj) * sij;
rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-this->beta0_meam[elti] * ai) * sij;
rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-this->beta1_meam[elti] * ai) * sij;
rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-this->beta2_meam[elti] * ai) * sij;
rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-this->beta3_meam[elti] * ai) * sij;
if (this->ialloy == 1) {
rhoa1j = rhoa1j * this->t1_meam[eltj];
rhoa2j = rhoa2j * this->t2_meam[eltj];
rhoa3j = rhoa3j * this->t3_meam[eltj];
rhoa1i = rhoa1i * this->t1_meam[elti];
rhoa2i = rhoa2i * this->t2_meam[elti];
rhoa3i = rhoa3i * this->t3_meam[elti];
const double rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
if (rij2 < cutforcesq) {
const int eltj = d_map[type[j]];
const double rij = sqrt(rij2);
const double ai = rij / re_meam[elti][elti] - 1.0;
const double aj = rij / re_meam[eltj][eltj] - 1.0;
const double ro0i = rho0_meam[elti];
const double ro0j = rho0_meam[eltj];
const double rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj) * sij;
double rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj) * sij;
double rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj) * sij;
double rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj) * sij;
const double rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai) * sij;
double rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai) * sij;
double rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai) * sij;
double rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai) * sij;
if (ialloy == 1) {
rhoa1j *= t1_meam[eltj];
rhoa2j *= t2_meam[eltj];
rhoa3j *= t3_meam[eltj];
rhoa1i *= t1_meam[elti];
rhoa2i *= t2_meam[elti];
rhoa3i *= t3_meam[elti];
}
a_rho0[i] += rhoa0j;
a_rho0[j] += rhoa0i;
// For ialloy = 2, use single-element value (not average)
if (this->ialloy != 2) {
a_t_ave(i,0) = a_t_ave(i,0) + this->t1_meam[eltj] * rhoa0j;
a_t_ave(i,1) = a_t_ave(i,1) + this->t2_meam[eltj] * rhoa0j;
a_t_ave(i,2) = a_t_ave(i,2) + this->t3_meam[eltj] * rhoa0j;
a_t_ave(j,0) = a_t_ave(j,0) + this->t1_meam[elti] * rhoa0i;
a_t_ave(j,1) = a_t_ave(j,1) + this->t2_meam[elti] * rhoa0i;
a_t_ave(j,2) = a_t_ave(j,2) + this->t3_meam[elti] * rhoa0i;
if (ialloy != 2) {
a_t_ave(i,0) += t1_meam[eltj] * rhoa0j;
a_t_ave(i,1) += t2_meam[eltj] * rhoa0j;
a_t_ave(i,2) += t3_meam[eltj] * rhoa0j;
a_t_ave(j,0) += t1_meam[elti] * rhoa0i;
a_t_ave(j,1) += t2_meam[elti] * rhoa0i;
a_t_ave(j,2) += t3_meam[elti] * rhoa0i;
}
if (this->ialloy == 1) {
a_tsq_ave(i,0) = a_tsq_ave(i,0) + this->t1_meam[eltj] * this->t1_meam[eltj] * rhoa0j;
a_tsq_ave(i,1) = a_tsq_ave(i,1) + this->t2_meam[eltj] * this->t2_meam[eltj] * rhoa0j;
a_tsq_ave(i,2) = a_tsq_ave(i,2) + this->t3_meam[eltj] * this->t3_meam[eltj] * rhoa0j;
a_tsq_ave(j,0) = a_tsq_ave(j,0) + this->t1_meam[elti] * this->t1_meam[elti] * rhoa0i;
a_tsq_ave(j,1) = a_tsq_ave(j,1) + this->t2_meam[elti] * this->t2_meam[elti] * rhoa0i;
a_tsq_ave(j,2) = a_tsq_ave(j,2) + this->t3_meam[elti] * this->t3_meam[elti] * rhoa0i;
if (ialloy == 1) {
a_tsq_ave(i,0) += t1_meam[eltj] * t1_meam[eltj] * rhoa0j;
a_tsq_ave(i,1) += t2_meam[eltj] * t2_meam[eltj] * rhoa0j;
a_tsq_ave(i,2) += t3_meam[eltj] * t3_meam[eltj] * rhoa0j;
a_tsq_ave(j,0) += t1_meam[elti] * t1_meam[elti] * rhoa0i;
a_tsq_ave(j,1) += t2_meam[elti] * t2_meam[elti] * rhoa0i;
a_tsq_ave(j,2) += t3_meam[elti] * t3_meam[elti] * rhoa0i;
}
a_arho2b[i] += rhoa2j;
a_arho2b[j] += rhoa2i;
A1j = rhoa1j / rij;
A2j = rhoa2j / rij2;
A3j = rhoa3j / (rij2 * rij);
A1i = rhoa1i / rij;
A2i = rhoa2i / rij2;
A3i = rhoa3i / (rij2 * rij);
nv2 = 0;
nv3 = 0;
for (m = 0; m < 3; m++) {
const double A1j = rhoa1j / rij;
const double A2j = rhoa2j / rij2;
const double A3j = rhoa3j / (rij2 * rij);
const double A1i = rhoa1i / rij;
const double A2i = rhoa2i / rij2;
const double A3i = rhoa3i / (rij2 * rij);
int nv2 = 0;
int nv3 = 0;
for (int m = 0; m < 3; m++) {
a_arho1(i,m) += A1j * delij[m];
a_arho1(j,m) += (-A1i * delij[m]);
a_arho1(j,m) += -A1i * delij[m];
a_arho3b(i,m) += rhoa3j * delij[m] / rij;
a_arho3b(j,m) += (- rhoa3i * delij[m] / rij);
for (n = m; n < 3; n++) {
a_arho3b(j,m) += -rhoa3i * delij[m] / rij;
for (int n = m; n < 3; n++) {
a_arho2(i,nv2) += A2j * delij[m] * delij[n];
a_arho2(j,nv2) += A2i * delij[m] * delij[n];
nv2 = nv2 + 1;
for (p = n; p < 3; p++) {
nv2++;
for (int p = n; p < 3; p++) {
a_arho3(i,nv3) += A3j * delij[m] * delij[n] * delij[p];
a_arho3(j,nv3) += (- A3i * delij[m] * delij[n] * delij[p]);
nv3 = nv3 + 1;
a_arho3(j,nv3) += -A3i * delij[m] * delij[n] * delij[p];
nv3++;
}
}
}
@ -476,12 +508,14 @@ MEAMKokkos<DeviceType>::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d_ra
}
}
/* ---------------------------------------------------------------------- */
//Cutoff function and derivative
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double MEAMKokkos<DeviceType>::dfcut(const double xi, double& dfc) const {
double a, a3, a4, a1m4;
double MEAMKokkos<DeviceType>::dfcut(const double xi, double& dfc) const
{
if (xi >= 1.0) {
dfc = 0.0;
return 1.0;
@ -489,66 +523,66 @@ double MEAMKokkos<DeviceType>::dfcut(const double xi, double& dfc) const {
dfc = 0.0;
return 0.0;
} else {
a = 1.0 - xi;
a3 = a * a * a;
a4 = a * a3;
a1m4 = 1.0-a4;
const double a = 1.0 - xi;
const double a3 = a * a * a;
const double a4 = a * a3;
const double a1m4 = 1.0 - a4;
dfc = 8 * a1m4 * a3;
return a1m4*a1m4;
}
}
}
//-----------------------------------------------------------------------------
// // Derivative of Cikj w.r.t. rij
// // Inputs: rij,rij2,rik2,rjk2
// //
//
// Derivative of Cikj w.r.t. rij
// Inputs: rij,rij2,rik2,rjk2
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double MEAMKokkos<DeviceType>::dCfunc(const double rij2, const double rik2, const double rjk2) const
{
double rij4, a, asq, b,denom;
rij4 = rij2 * rij2;
a = rik2 - rjk2;
b = rik2 + rjk2;
asq = a*a;
denom = rij4 - asq;
const double rij4 = rij2 * rij2;
const double a = rik2 - rjk2;
const double b = rik2 + rjk2;
const double asq = a*a;
double denom = rij4 - asq;
denom = denom * denom;
return -4 * (-2 * rij2 * asq + rij4 * b + asq * b) / denom;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void MEAMKokkos<DeviceType>::dCfunc2(const double rij2, const double rik2, const double rjk2, double& dCikj1, double& dCikj2) const
{
double rij4, rik4, rjk4, a, denom;
rij4 = rij2 * rij2;
rik4 = rik2 * rik2;
rjk4 = rjk2 * rjk2;
a = rik2 - rjk2;
denom = rij4 - a * a;
const double rij4 = rij2 * rij2;
const double rik4 = rik2 * rik2;
const double rjk4 = rjk2 * rjk2;
const double a = rik2 - rjk2;
double denom = rij4 - a * a;
denom = denom * denom;
dCikj1 = 4 * rij2 * (rij4 + rik4 + 2 * rik2 * rjk2 - 3 * rjk4 - 2 * rij2 * a) / denom;
dCikj2 = 4 * rij2 * (rij4 - 3 * rik4 + 2 * rik2 * rjk2 + rjk4 + 2 * rij2 * a) / denom;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double MEAMKokkos<DeviceType>::fcut(const double xi) const{
double MEAMKokkos<DeviceType>::fcut(const double xi) const
{
double a;
if (xi >= 1.0)
return 1.0;
else if (xi <= 0.0)
return 0.0;
else {
// ( 1.d0 - (1.d0 - xi)**4 )**2, but with better codegen
a = 1.0 - xi;
a *= a; a *= a;
a = 1.0 - a;
return a * a;
}
}
}

View File

@ -5,24 +5,25 @@
using namespace LAMMPS_NS;
using namespace MathSpecialKokkos;
template<class DeviceType>
void
MEAMKokkos<DeviceType>::meam_force(int inum_half, int eflag_either, int eflag_global, int eflag_atom, int vflag_atom, double* eng_vdwl,
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d_randomread type, typename AT::t_int_1d_randomread fmap, typename AT::t_x_array_randomread x, typename AT::t_int_1d_randomread numneigh,
typename AT::t_int_1d_randomread numneigh_full, typename AT::t_f_array f, typename ArrayTypes<DeviceType>::t_virial_array vatom, typename AT::t_int_1d_randomread d_ilist_half, typename AT::t_int_1d_randomread d_offset, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, int neighflag)
MEAMKokkos<DeviceType>::meam_force(int inum_half, int eflag_global, int eflag_atom, int vflag_global, int vflag_atom,
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d numneigh,
typename AT::t_int_1d numneigh_full, typename AT::t_f_array f, typename ArrayTypes<DeviceType>::t_virial_array vatom, typename AT::t_int_1d d_ilist_half, typename AT::t_int_1d d_offset, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, int neighflag, int need_dup, EV_FLOAT &ev_all)
{
EV_FLOAT ev;
ev.evdwl = *eng_vdwl;
this->eflag_either = eflag_either;
this->eflag_global = eflag_global;
this->eflag_atom = eflag_atom;
this->vflag_global = vflag_global;
this->vflag_atom = vflag_atom;
eflag_either = eflag_atom || eflag_global;
vflag_either = vflag_atom || vflag_global;
this->d_eatom = eatom;
this->ntype = ntype;
this->type = type;
this->fmap = fmap;
this->d_map = d_map;
this->x = x;
this->d_numneigh_half = numneigh;
this->d_numneigh_full = numneigh_full;
@ -33,67 +34,90 @@ MEAMKokkos<DeviceType>::meam_force(int inum_half, int eflag_either, int eflag_gl
this->d_ilist_half = d_ilist_half;
this->d_offset = d_offset;
if (neighflag == FULL)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMforce<FULL>>(0,inum_half),*this,ev);
else if (neighflag == HALF)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMforce<HALF>>(0,inum_half),*this,ev);
if (need_dup) {
dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
if (eflag_atom) dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
if (vflag_atom) dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
} else {
ndup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
if (eflag_atom) ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
if (vflag_atom) ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
}
copymode = 1;
if (neighflag == HALF)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALF>>(0,inum_half),*this,ev);
else if (neighflag == HALFTHREAD)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMforce<HALFTHREAD>>(0,inum_half),*this,ev);
*eng_vdwl = ev.evdwl;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALFTHREAD>>(0,inum_half),*this,ev);
ev_all += ev;
copymode = 0;
if (need_dup) {
Kokkos::Experimental::contribute(f, dup_f);
if (eflag_atom) Kokkos::Experimental::contribute(d_eatom, dup_eatom);
if (vflag_atom) Kokkos::Experimental::contribute(d_vatom, dup_vatom);
// free duplicated memory
dup_f = decltype(dup_f)();
if (eflag_atom) dup_eatom = decltype(dup_eatom)();
if (vflag_atom) dup_vatom = decltype(dup_vatom)();
}
}
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void
MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FLOAT& ev) const {
MEAMKokkos<DeviceType>::operator()(TagMEAMForce<NEIGHFLAG>, const int &ii, EV_FLOAT& ev) const {
int i, j, jn, k, kn, kk, m, n, p, q;
int nv2, nv3, elti, eltj, eltk, ind;
X_FLOAT xitmp, yitmp, zitmp, delij[3];
F_FLOAT rij2, rij, rij3;
F_FLOAT v[6], fi[3], fj[3];
F_FLOAT third, sixth;
F_FLOAT pp, dUdrij, dUdsij, dUdrijm[3], force, forcem;
F_FLOAT r, recip, phi, phip;
F_FLOAT sij;
F_FLOAT a1, a1i, a1j, a2, a2i, a2j;
F_FLOAT a3i, a3j;
F_FLOAT shpi[3], shpj[3];
F_FLOAT ai, aj, ro0i, ro0j, invrei, invrej;
F_FLOAT rhoa0j, drhoa0j, rhoa0i, drhoa0i;
F_FLOAT rhoa1j, drhoa1j, rhoa1i, drhoa1i;
F_FLOAT rhoa2j, drhoa2j, rhoa2i, drhoa2i;
F_FLOAT a3, a3a, rhoa3j, drhoa3j, rhoa3i, drhoa3i;
F_FLOAT drho0dr1, drho0dr2, drho0ds1, drho0ds2;
F_FLOAT drho1dr1, drho1dr2, drho1ds1, drho1ds2;
F_FLOAT drho1drm1[3], drho1drm2[3];
F_FLOAT drho2dr1, drho2dr2, drho2ds1, drho2ds2;
F_FLOAT drho2drm1[3], drho2drm2[3];
F_FLOAT drho3dr1, drho3dr2, drho3ds1, drho3ds2;
F_FLOAT drho3drm1[3], drho3drm2[3];
F_FLOAT dt1dr1, dt1dr2, dt1ds1, dt1ds2;
F_FLOAT dt2dr1, dt2dr2, dt2ds1, dt2ds2;
F_FLOAT dt3dr1, dt3dr2, dt3ds1, dt3ds2;
F_FLOAT drhodr1, drhodr2, drhods1, drhods2, drhodrm1[3], drhodrm2[3];
F_FLOAT arg;
F_FLOAT arg1i1, arg1j1, arg1i2, arg1j2, arg1i3, arg1j3, arg3i3, arg3j3;
F_FLOAT dsij1, dsij2, force1, force2;
F_FLOAT t1i, t2i, t3i, t1j, t2j, t3j;
double rij2, rij, rij3;
double v[6], fi[3], fj[3];
double third, sixth;
double pp, dUdrij, dUdsij, dUdrijm[3], force, forcem;
double recip, phi, phip;
double sij;
double a1, a1i, a1j, a2, a2i, a2j;
double a3i, a3j;
double shpi[3], shpj[3];
double ai, aj, ro0i, ro0j, invrei, invrej;
double rhoa0j, drhoa0j, rhoa0i, drhoa0i;
double rhoa1j, drhoa1j, rhoa1i, drhoa1i;
double rhoa2j, drhoa2j, rhoa2i, drhoa2i;
double a3, a3a, rhoa3j, drhoa3j, rhoa3i, drhoa3i;
double drho0dr1, drho0dr2, drho0ds1, drho0ds2;
double drho1dr1, drho1dr2, drho1ds1, drho1ds2;
double drho1drm1[3], drho1drm2[3];
double drho2dr1, drho2dr2, drho2ds1, drho2ds2;
double drho2drm1[3], drho2drm2[3];
double drho3dr1, drho3dr2, drho3ds1, drho3ds2;
double drho3drm1[3], drho3drm2[3];
double dt1dr1, dt1dr2, dt1ds1, dt1ds2;
double dt2dr1, dt2dr2, dt2ds1, dt2ds2;
double dt3dr1, dt3dr2, dt3ds1, dt3ds2;
double drhodr1, drhodr2, drhods1, drhods2, drhodrm1[3], drhodrm2[3];
double arg;
double arg1i1, arg1j1, arg1i2, arg1j2, arg1i3, arg1j3, arg3i3, arg3j3;
double dsij1, dsij2, force1, force2;
double t1i, t2i, t3i, t1j, t2j, t3j;
int fnoffset;
// To do: update with duplication for OpenMP, atomic for GPUs
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = d_eatom;
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = d_vatom;
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
// The f, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_eatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
auto a_eatom = v_eatom.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
auto v_vatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
auto a_vatom = v_vatom.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
i = d_ilist_half[ii];
fnoffset = d_offset[i];
third = 1.0 / 3.0;
sixth = 1.0 / 6.0;
// Compute forces atom i
elti = fmap[type[i]];
elti = d_map[type[i]];
if (elti < 0) return;
xitmp = x(i,0);
@ -102,9 +126,8 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
// Treat each pair
for (jn = 0; jn < d_numneigh_half[i]; jn++) {
//j = firstneigh[jn];
j = d_neighbors_half(i,jn);
eltj = fmap[type[j]];
eltj = d_map[type[j]];
if (!iszero_kk(d_scrfcn[fnoffset + jn]) && eltj >= 0) {
@ -113,29 +136,29 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
delij[1] = x(j,1) - yitmp;
delij[2] = x(j,2) - zitmp;
rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
if (rij2 < this->cutforcesq) {
if (rij2 < cutforcesq) {
rij = sqrt(rij2);
r = rij;
recip = 1.0 / rij;
// Compute phi and phip
ind = this->eltind[elti][eltj];
pp = rij * this->rdrar;
ind = eltind[elti][eltj];
pp = rij * rdrar;
kk = (int)pp;
kk = (kk <= (this->nrar - 2))?kk:this->nrar - 2;
kk = (kk <= (nrar - 2))?kk:nrar - 2;
pp = pp - kk;
pp = (pp <= 1.0)?pp:1.0;
phi = ((this->d_phirar3(ind,kk) * pp + this->d_phirar2(ind,kk)) * pp + this->d_phirar1(ind,kk)) * pp +
this->d_phirar(ind,kk);
phip = (this->d_phirar6(ind,kk) * pp + this->d_phirar5(ind,kk)) * pp + this->d_phirar4(ind,kk);
recip = 1.0 / r;
phi = ((d_phirar3(ind,kk) * pp + d_phirar2(ind,kk)) * pp + d_phirar1(ind,kk)) * pp +
d_phirar(ind,kk);
phip = (d_phirar6(ind,kk) * pp + d_phirar5(ind,kk)) * pp + d_phirar4(ind,kk);
if (eflag_either != 0) {
if (eflag_global != 0)
//*eng_vdwl = *eng_vdwl + phi * sij;
ev.evdwl = ev.evdwl + phi * sij;
if (eflag_atom != 0) {
a_eatom[i] += (0.5 * phi * sij);
a_eatom[j] += (0.5 * phi * sij);
if (eflag_either) {
double scaleij = d_scale(type[i],type[i]);
double phi_sc = phi * scaleij;
if (eflag_global)
ev.evdwl += phi_sc * sij;
if (eflag_atom) {
a_eatom[i] += 0.5 * phi * sij;
a_eatom[j] += 0.5 * phi * sij;
}
}
@ -143,30 +166,30 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
// write(1,*) "force_meamf: phip: ",phip
// Compute pair densities and derivatives
invrei = 1.0 / this->re_meam[elti][elti];
invrei = 1.0 / re_meam[elti][elti];
ai = rij * invrei - 1.0;
ro0i = this->rho0_meam[elti];
rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-this->beta0_meam[elti] * ai);
drhoa0i = -this->beta0_meam[elti] * invrei * rhoa0i;
rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-this->beta1_meam[elti] * ai);
drhoa1i = -this->beta1_meam[elti] * invrei * rhoa1i;
rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-this->beta2_meam[elti] * ai);
drhoa2i = -this->beta2_meam[elti] * invrei * rhoa2i;
rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-this->beta3_meam[elti] * ai);
drhoa3i = -this->beta3_meam[elti] * invrei * rhoa3i;
ro0i = rho0_meam[elti];
rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai);
drhoa0i = -beta0_meam[elti] * invrei * rhoa0i;
rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai);
drhoa1i = -beta1_meam[elti] * invrei * rhoa1i;
rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai);
drhoa2i = -beta2_meam[elti] * invrei * rhoa2i;
rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai);
drhoa3i = -beta3_meam[elti] * invrei * rhoa3i;
if (elti != eltj) {
invrej = 1.0 / this->re_meam[eltj][eltj];
invrej = 1.0 / re_meam[eltj][eltj];
aj = rij * invrej - 1.0;
ro0j = this->rho0_meam[eltj];
rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-this->beta0_meam[eltj] * aj);
drhoa0j = -this->beta0_meam[eltj] * invrej * rhoa0j;
rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-this->beta1_meam[eltj] * aj);
drhoa1j = -this->beta1_meam[eltj] * invrej * rhoa1j;
rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-this->beta2_meam[eltj] * aj);
drhoa2j = -this->beta2_meam[eltj] * invrej * rhoa2j;
rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-this->beta3_meam[eltj] * aj);
drhoa3j = -this->beta3_meam[eltj] * invrej * rhoa3j;
ro0j = rho0_meam[eltj];
rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj);
drhoa0j = -beta0_meam[eltj] * invrej * rhoa0j;
rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj);
drhoa1j = -beta1_meam[eltj] * invrej * rhoa1j;
rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj);
drhoa2j = -beta2_meam[eltj] * invrej * rhoa2j;
rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj);
drhoa3j = -beta3_meam[eltj] * invrej * rhoa3j;
} else {
rhoa0j = rhoa0i;
drhoa0j = drhoa0i;
@ -178,14 +201,14 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
drhoa3j = drhoa3i;
}
const double t1mi = this->t1_meam[elti];
const double t2mi = this->t2_meam[elti];
const double t3mi = this->t3_meam[elti];
const double t1mj = this->t1_meam[eltj];
const double t2mj = this->t2_meam[eltj];
const double t3mj = this->t3_meam[eltj];
const double t1mi = t1_meam[elti];
const double t2mi = t2_meam[elti];
const double t3mi = t3_meam[elti];
const double t1mj = t1_meam[eltj];
const double t2mj = t2_meam[eltj];
const double t3mj = t3_meam[eltj];
if (this->ialloy == 1) {
if (ialloy == 1) {
rhoa1j *= t1mj;
rhoa2j *= t2mj;
rhoa3j *= t3mj;
@ -213,12 +236,12 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
for (n = 0; n < 3; n++) {
for (p = n; p < 3; p++) {
for (q = p; q < 3; q++) {
arg = delij[n] * delij[p] * delij[q] * this->v3D[nv3];
arg = delij[n] * delij[p] * delij[q] * v3D[nv3];
arg1i3 = arg1i3 + d_arho3(i,nv3) * arg;
arg1j3 = arg1j3 - d_arho3(j,nv3) * arg;
nv3 = nv3 + 1;
}
arg = delij[n] * delij[p] * this->v2D[nv2];
arg = delij[n] * delij[p] * v2D[nv2];
arg1i2 = arg1i2 + d_arho2(i,nv2) * arg;
arg1j2 = arg1j2 + d_arho2(j,nv2) * arg;
nv2 = nv2 + 1;
@ -252,8 +275,8 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
drho2drm1[m] = 0.0;
drho2drm2[m] = 0.0;
for (n = 0; n < 3; n++) {
drho2drm1[m] = drho2drm1[m] + d_arho2(i,this->vind2D[m][n]) * delij[n];
drho2drm2[m] = drho2drm2[m] - d_arho2(j,this->vind2D[m][n]) * delij[n];
drho2drm1[m] = drho2drm1[m] + d_arho2(i,vind2D[m][n]) * delij[n];
drho2drm2[m] = drho2drm2[m] - d_arho2(j,vind2D[m][n]) * delij[n];
}
drho2drm1[m] = a2 * rhoa2j * drho2drm1[m];
drho2drm2[m] = -a2 * rhoa2i * drho2drm2[m];
@ -273,9 +296,9 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
nv2 = 0;
for (n = 0; n < 3; n++) {
for (p = n; p < 3; p++) {
arg = delij[n] * delij[p] * this->v2D[nv2];
drho3drm1[m] = drho3drm1[m] + d_arho3(i,this->vind3D[m][n][p]) * arg;
drho3drm2[m] = drho3drm2[m] + d_arho3(j,this->vind3D[m][n][p]) * arg;
arg = delij[n] * delij[p] * v2D[nv2];
drho3drm1[m] = drho3drm1[m] + d_arho3(i,vind3D[m][n][p]) * arg;
drho3drm2[m] = drho3drm2[m] + d_arho3(j,vind3D[m][n][p]) * arg;
nv2 = nv2 + 1;
}
}
@ -291,7 +314,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
t2j = d_t_ave(j,1);
t3j = d_t_ave(j,2);
if (this->ialloy == 1) {
if (ialloy == 1) {
a1i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i,0));
a1j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j,0));
@ -307,7 +330,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
dt3dr1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
dt3dr2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
} else if (this->ialloy == 2) {
} else if (ialloy == 2) {
dt1dr1 = 0.0;
dt1dr2 = 0.0;
@ -334,21 +357,23 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
}
// Compute derivatives of total density wrt rij, sij and rij(3)
get_shpfcn(this->lattce_meam[elti][elti], shpi);
get_shpfcn(this->lattce_meam[eltj][eltj], shpj);
drhodr1 = dgamma1[i] * drho0dr1 +
dgamma2[i] * (dt1dr1 * d_rho1[i] + t1i * drho1dr1 + dt2dr1 * d_rho2[i] + t2i * drho2dr1 +
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpi);
get_shpfcn(lattce_meam[eltj][eltj], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpj);
drhodr1 = d_dgamma1[i] * drho0dr1 +
d_dgamma2[i] * (dt1dr1 * d_rho1[i] + t1i * drho1dr1 + dt2dr1 * d_rho2[i] + t2i * drho2dr1 +
dt3dr1 * d_rho3[i] + t3i * drho3dr1) -
dgamma3[i] * (shpi[0] * dt1dr1 + shpi[1] * dt2dr1 + shpi[2] * dt3dr1);
drhodr2 = dgamma1[j] * drho0dr2 +
dgamma2[j] * (dt1dr2 * d_rho1[j] + t1j * drho1dr2 + dt2dr2 * d_rho2[j] + t2j * drho2dr2 +
d_dgamma3[i] * (shpi[0] * dt1dr1 + shpi[1] * dt2dr1 + shpi[2] * dt3dr1);
drhodr2 = d_dgamma1[j] * drho0dr2 +
d_dgamma2[j] * (dt1dr2 * d_rho1[j] + t1j * drho1dr2 + dt2dr2 * d_rho2[j] + t2j * drho2dr2 +
dt3dr2 * d_rho3[j] + t3j * drho3dr2) -
dgamma3[j] * (shpj[0] * dt1dr2 + shpj[1] * dt2dr2 + shpj[2] * dt3dr2);
d_dgamma3[j] * (shpj[0] * dt1dr2 + shpj[1] * dt2dr2 + shpj[2] * dt3dr2);
for (m = 0; m < 3; m++) {
drhodrm1[m] = 0.0;
drhodrm2[m] = 0.0;
drhodrm1[m] = dgamma2[i] * (t1i * drho1drm1[m] + t2i * drho2drm1[m] + t3i * drho3drm1[m]);
drhodrm2[m] = dgamma2[j] * (t1j * drho1drm2[m] + t2j * drho2drm2[m] + t3j * drho3drm2[m]);
drhodrm1[m] = d_dgamma2[i] * (t1i * drho1drm1[m] + t2i * drho2drm1[m] + t3i * drho3drm1[m]);
drhodrm2[m] = d_dgamma2[j] * (t1j * drho1drm2[m] + t2j * drho2drm2[m] + t3j * drho3drm2[m]);
}
// Compute derivatives wrt sij, but only if necessary
@ -366,7 +391,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
drho3ds1 = a3 * rhoa3j * arg1i3 - a3a * rhoa3j * arg3i3;
drho3ds2 = a3 * rhoa3i * arg1j3 - a3a * rhoa3i * arg3j3;
if (this->ialloy == 1) {
if (ialloy == 1) {
a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i,0));
a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j,0));
a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i,1));
@ -381,7 +406,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
} else if (this->ialloy == 2) {
} else if (ialloy == 2) {
dt1ds1 = 0.0;
dt1ds2 = 0.0;
@ -432,12 +457,12 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
for (m = 0; m < 3; m++) {
forcem = delij[m] * force + dUdrijm[m];
a_f(i,m) += forcem;
a_f(j,m) -= -forcem;
a_f(j,m) -= forcem;
}
// Tabulate per-atom virial as symmetrized stress tensor
if (vflag_atom != 0) {
if (vflag_either) {
fi[0] = delij[0] * force + dUdrijm[0];
fi[1] = delij[1] * force + dUdrijm[1];
fi[2] = delij[2] * force + dUdrijm[2];
@ -448,36 +473,41 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
v[4] = -0.25 * (delij[0] * fi[2] + delij[2] * fi[0]);
v[5] = -0.25 * (delij[1] * fi[2] + delij[2] * fi[1]);
if (vflag_global)
for (m = 0; m < 6; m++)
ev.v[m] += 2.0*v[m];
if (vflag_atom) {
for (m = 0; m < 6; m++) {
a_vatom(i,m) += v[m];
a_vatom(j,m) += v[m];
}
}
}
// Now compute forces on other atoms k due to change in sij
if (iszero_kk(sij) || iszero_kk(sij - 1.0)) continue; //: cont jn loop
if (iszero_kk(sij) || isone_kk(sij)) continue; //: cont jn loop
double dxik(0), dyik(0), dzik(0);
double dxjk(0), dyjk(0), dzjk(0);
for (kn = 0; kn < d_numneigh_full[i]; kn++) {
//k = firstneigh_full[kn];
k = d_neighbors_full(i,kn);
eltk = fmap[type[k]];
eltk = d_map[type[k]];
if (k != j && eltk >= 0) {
double xik, xjk, cikj, sikj, dfc, a;
double dCikj1, dCikj2;
double delc, rik2, rjk2;
sij = d_scrfcn[jn+fnoffset] * d_fcpair[jn+fnoffset];
const double Cmax = this->Cmax_meam[elti][eltj][eltk];
const double Cmin = this->Cmin_meam[elti][eltj][eltk];
const double Cmax = Cmax_meam[elti][eltj][eltk];
const double Cmin = Cmin_meam[elti][eltj][eltk];
dsij1 = 0.0;
dsij2 = 0.0;
if (!iszero_kk(sij) && !iszero_kk(sij - 1.0)) {
const double rbound = rij2 * this->ebound_meam[elti][eltj];
if (!iszero_kk(sij) && !isone_kk(sij)) {
const double rbound = rij2 * ebound_meam[elti][eltj];
delc = Cmax - Cmin;
dxjk = x(k,0) - x(j,0);
dyjk = x(k,1) - x(j,1);
@ -523,7 +553,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
// Tabulate per-atom virial as symmetrized stress tensor
if (vflag_atom != 0) {
if (vflag_either) {
fi[0] = force1 * dxik;
fi[1] = force1 * dyik;
fi[2] = force1 * dzik;
@ -537,6 +567,11 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
v[4] = -sixth * (dxik * fi[2] + dxjk * fj[2] + dzik * fi[0] + dzjk * fj[0]);
v[5] = -sixth * (dyik * fi[2] + dyjk * fj[2] + dzik * fi[1] + dzjk * fj[1]);
if (vflag_global)
for (m = 0; m < 6; m++)
ev.v[m] += 3.0*v[m];
if (vflag_atom) {
for (m = 0; m < 6; m++) {
a_vatom(i,m) += v[m];
a_vatom(j,m) += v[m];
@ -545,6 +580,7 @@ MEAMKokkos<DeviceType>::operator()(TagMEAMforce<NEIGHFLAG>, const int &ii, EV_FL
}
}
}
}
// end of k loop
}
}

View File

@ -12,7 +12,7 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Sebastian Hütter (OvGU)
Contributing author: Naga Vydyanathan (NVIDIA)
------------------------------------------------------------------------- */
#include "math_special_kokkos.h"
@ -51,7 +51,7 @@ double MEAMKokkos<DeviceType>::G_gam(const double gamma, const int ibar, int &er
case 1:
return MathSpecialKokkos::fm_exp(gamma / 2.0);
case 3:
return 2.0 / (1.0 + exp(-gamma));
return 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma));
case -5:
if ((1.0 + gamma) >= 0) {
return sqrt(1.0 + gamma);
@ -144,6 +144,30 @@ double MEAMKokkos<DeviceType>::zbl(const double r, const int z1, const int z2) c
return result;
}
//-----------------------------------------------------------------------------
// Compute embedding function F(rhobar) and derivative F'(rhobar), eqn I.5
//
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double MEAMKokkos<DeviceType>::embedding(const double A, const double Ec, const double rhobar, double& dF) const
{
const double AEc = A * Ec;
if (rhobar > 0.0) {
const double lrb = log(rhobar);
dF = AEc * (1.0 + lrb);
return AEc * rhobar * lrb;
} else {
if (emb_lin_neg == 0) {
dF = 0.0;
return 0.0;
} else {
dF = - AEc;
return - AEc * rhobar;
}
}
}
//-----------------------------------------------------------------------------
// Compute Rose energy function, I.16
//
@ -178,7 +202,7 @@ double MEAMKokkos<DeviceType>::erose(const double r, const double re, const doub
//
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, double (&s)[3]) const
void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, const double sthe, const double cthe, double (&s)[3]) const
{
switch (latt) {
case FCC:
@ -194,7 +218,9 @@ void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, double (&s)[3]) co
s[1] = 0.0;
s[2] = 1.0 / 3.0;
break;
case CH4: // CH4 actually needs shape factor for diamond for C, dimer for H
case DIA:
case DIA3:
s[0] = 0.0;
s[1] = 0.0;
s[2] = 32.0 / 9.0;
@ -202,8 +228,20 @@ void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, double (&s)[3]) co
case DIM:
s[0] = 1.0;
s[1] = 2.0 / 3.0;
// s(3) = 1.d0
s[2] = 0.40;
// s(4) = 1.d0 // this should be 0.4 unless (1-legendre) is multiplied in the density calc.
s[2] = 0.40; // this is (1-legendre) where legendre = 0.6 in dynamo is accounted.
break;
case LIN: // linear, theta being 180
s[0] = 0.0;
s[1] = 8.0 / 3.0; // 4*(co**4 + si**4 - 1.0/3.0) in zig become 4*(1-1/3)
s[2] = 0.0;
break;
case ZIG: //zig-zag
case TRI: //trimer e.g. H2O
s[0] = 4.0*pow(cthe,2);
s[1] = 4.0*(pow(cthe,4) + pow(sthe,4) - 1.0/3.0);
s[2] = 4.0*(pow(cthe,2) * (3*pow(sthe,4) + pow(cthe,4)));
s[2] = s[2] - 0.6*s[0]; //legend in dyn, 0.6 is default value.
break;
default:
s[0] = 0.0;
@ -225,102 +263,26 @@ int MEAMKokkos<DeviceType>::get_Zij(const lattice_t latt) const
return 8;
case HCP:
return 12;
case B1:
return 6;
case DIA:
case DIA3:
return 4;
case DIM:
return 1;
case B1:
return 6;
case C11:
return 10;
case L12:
return 12;
case B2:
return 8;
case CH4: // DYNAMO currently implemented this way while it needs two Z values, 4 and 1
return 4;
case LIN:
case ZIG:
case TRI:
return 2;
// call error('Lattice not defined in get_Zij.')
}
return 0;
}
//-----------------------------------------------------------------------------
// Number of second neighbors for the reference structure
// a = distance ratio R1/R2
// S = second neighbor screening function
//
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int MEAMKokkos<DeviceType>::get_Zij2(const lattice_t latt, const double cmin, const double cmax, double& a, double& S) const
{
double C, x, sijk;
int Zij2 = 0, numscr = 0;
switch (latt) {
case FCC:
Zij2 = 6;
a = sqrt(2.0);
numscr = 4;
break;
case BCC:
Zij2 = 6;
a = 2.0 / sqrt(3.0);
numscr = 4;
break;
case HCP:
Zij2 = 6;
a = sqrt(2.0);
numscr = 4;
break;
case B1:
Zij2 = 12;
a = sqrt(2.0);
numscr = 2;
break;
case DIA:
Zij2 = 12;
a = sqrt(8.0 / 3.0);
numscr = 1;
if (cmin < 0.500001) {
// call error('can not do 2NN MEAM for dia')
}
break;
case DIM:
// this really shouldn't be allowed; make sure screening is zero
a = 1.0;
S = 0.0;
return 0;
case L12:
Zij2 = 6;
a = sqrt(2.0);
numscr = 4;
break;
case B2:
Zij2 = 6;
a = 2.0 / sqrt(3.0);
numscr = 4;
break;
case C11:
// unsupported lattice flag C11 in get_Zij
break;
default:
// unknown lattic flag in get Zij
// call error('Lattice not defined in get_Zij.')
break;
}
// Compute screening for each first neighbor
C = 4.0 / (a * a) - 1.0;
x = (C - cmin) / (cmax - cmin);
sijk = fcut(x);
// There are numscr first neighbors screening the second neighbors
S = MathSpecialKokkos::powint(sijk, numscr);
return Zij2;
}

View File

@ -12,11 +12,12 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Sebastian Hütter (OvGU)
Contributing author: Naga Vydyanathan (NVIDIA), Stan Moore (SNL)
------------------------------------------------------------------------- */
#include "memory_kokkos.h"
#include "meam_kokkos.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
@ -24,21 +25,15 @@ using namespace LAMMPS_NS;
template<class DeviceType>
MEAMKokkos<DeviceType>::MEAMKokkos(Memory *mem) : MEAM(mem)
{
d_errorflag = typename AT::t_int_scalar("meam:errorflag");
}
template<class DeviceType>
MEAMKokkos<DeviceType>::~MEAMKokkos()
{
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
if (copymode) return;
memoryKK->destroy_kokkos(k_phirar6,phirar6);
memoryKK->destroy_kokkos(k_phirar5,phirar5);
memoryKK->destroy_kokkos(k_phirar4,phirar4);
memoryKK->destroy_kokkos(k_phirar3,phirar3);
memoryKK->destroy_kokkos(k_phirar2,phirar2);
memoryKK->destroy_kokkos(k_phirar1,phirar1);
memoryKK->destroy_kokkos(k_phirar,phirar);
memoryKK->destroy_kokkos(k_phir,phir);
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
memoryKK->destroy_kokkos(k_rho,rho);
memoryKK->destroy_kokkos(k_rho0,rho0);
@ -63,10 +58,10 @@ MEAMKokkos<DeviceType>::~MEAMKokkos()
memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn);
memoryKK->destroy_kokkos(k_fcpair,fcpair);
}
#include "meam_setup_done_kokkos.h"
#include "meam_funcs_kokkos.h"
#include "meam_dens_init_kokkos.h"
#include "meam_dens_final_kokkos.h"
#include "meam_force_kokkos.h"
//

View File

@ -14,68 +14,77 @@ namespace LAMMPS_NS {
struct TagMEAMDensFinal{};
template<int NEIGHFLAG>
struct TagMEAMDensInit{};
struct TagMEAMInitialize{};
struct TagMEAMZero{};
template<int NEIGHFLAG>
struct TagMEAMforce{};
struct TagMEAMForce{};
template<class DeviceType>
class MEAMKokkos : public MEAM
{
public:
public:
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
MEAMKokkos(Memory* mem);
~MEAMKokkos();
~MEAMKokkos() override;
KOKKOS_INLINE_FUNCTION
void operator()(TagMEAMDensFinal, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagMEAMDensInit<NEIGHFLAG>, const int&, EV_FLOAT&) const;
void operator()(TagMEAMDensInit<NEIGHFLAG>, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagMEAMInitialize, const int&) const;
void operator()(TagMEAMZero, const int&) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagMEAMforce<NEIGHFLAG>, const int&, EV_FLOAT&) const;
protected:
//Parameters to meam_dens_init - is there a better way to do this?
int ntype;
typename AT::t_int_1d_randomread type;
typename AT::t_int_1d_randomread d_offset;
typename AT::t_int_1d_randomread fmap;
typename AT::t_x_array_randomread x;
typename AT::t_int_1d_randomread d_numneigh_half;
typename AT::t_int_1d_randomread d_numneigh_full;
void operator()(TagMEAMForce<NEIGHFLAG>, const int&, EV_FLOAT&) const;
private:
// parameters to meam_dens_init
int ntype, nlocal;
typename AT::t_int_1d type;
typename AT::t_int_1d d_offset;
typename AT::t_int_1d d_map;
typename AT::t_int_2d d_scale;
typename AT::t_x_array x;
typename AT::t_int_1d d_numneigh_half;
typename AT::t_int_1d d_numneigh_full;
typename AT::t_neighbors_2d d_neighbors_half;
typename AT::t_neighbors_2d d_neighbors_full;
typename AT::t_int_1d_randomread d_ilist_half;
typename AT::t_int_1d d_ilist_half;
typename AT::t_f_array f;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
//Parameters to meam_dens_final - is there a better way to do this?
int eflag_either, eflag_global, eflag_atom, vflag_atom;
double *eng_vdwl;
// parameters to meam_dens_final
typename AT::t_int_scalar d_errorflag;
int eflag_either, eflag_global, eflag_atom, vflag_either, vflag_global, vflag_atom;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
public:
void meam_dens_setup(int, int, int);
void meam_setup_done(void);
void meam_dens_init(int , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread, typename AT::t_x_array_randomread, typename AT::t_int_1d_randomread,
typename AT::t_int_1d_randomread , int* , typename AT::t_int_1d_randomread, typename AT::t_neighbors_2d,typename AT::t_neighbors_2d,typename AT::t_int_1d_randomread, int );
void meam_dens_final(int , int , int , int , double* ,
typename ArrayTypes<DeviceType>::t_efloat_1d , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , int& );
void meam_force(int , int , int , int , int , double* ,
typename ArrayTypes<DeviceType>::t_efloat_1d , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread ,
typename AT::t_int_1d_randomread , typename AT::t_f_array , typename ArrayTypes<DeviceType>::t_virial_array ,typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread, typename AT::t_neighbors_2d, typename AT::t_neighbors_2d, int);
public:
void meam_dens_setup(int, int, int) override;
void meam_setup_done(double*) override;
void meam_dens_init(int, int, typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_x_array,
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_int_1d,
typename AT::t_neighbors_2d, typename AT::t_neighbors_2d, typename AT::t_int_1d,
int, int);
void meam_dens_final(int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d,
int, typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_int_2d, int&, EV_FLOAT&);
void meam_force(int, int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d,
int, typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_x_array, typename AT::t_int_1d,
typename AT::t_int_1d, typename AT::t_f_array, typename ArrayTypes<DeviceType>::t_virial_array,
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_neighbors_2d, typename AT::t_neighbors_2d,
int, int, EV_FLOAT&);
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void getscreen(int , int, typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread,
typename AT::t_int_1d_randomread, int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread ) const;
void getscreen(int, int, typename AT::t_x_array, typename AT::t_int_1d,
typename AT::t_int_1d, int, typename AT::t_int_1d, typename AT::t_int_1d) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void calc_rho1(int , int , typename AT::t_int_1d_randomread , typename AT::t_int_1d_randomread , typename AT::t_x_array_randomread , typename AT::t_int_1d_randomread, int ) const;
void calc_rho1(int, int, typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_x_array, typename AT::t_int_1d, int) const;
KOKKOS_INLINE_FUNCTION
double fcut(const double xi) const;
KOKKOS_INLINE_FUNCTION
@ -91,16 +100,17 @@ public:
KOKKOS_INLINE_FUNCTION
double zbl(const double, const int, const int) const;
KOKKOS_INLINE_FUNCTION
double embedding(const double, const double, const double, double&) const;
KOKKOS_INLINE_FUNCTION
double erose(const double, const double, const double, const double, const double, const double, const int) const;
KOKKOS_INLINE_FUNCTION
void get_shpfcn(const lattice_t latt, double (&s)[3]) const;
void get_shpfcn(const lattice_t latt, const double sthe, const double cthe,
double (&s)[3]) const;
KOKKOS_INLINE_FUNCTION
int get_Zij(const lattice_t ) const;
KOKKOS_INLINE_FUNCTION
int get_Zij2(const lattice_t, const double, const double, double&, double&) const;
public:
int get_Zij(const lattice_t) const;
public:
DAT::tdual_ffloat_1d k_rho, k_rho0, k_rho1, k_rho2, k_rho3, k_frhop;
typename ArrayTypes<DeviceType>::t_ffloat_1d d_rho, d_rho0,d_rho1, d_rho2, d_rho3, d_frhop;
typename ArrayTypes<DeviceType>::t_ffloat_1d d_rho, d_rho0, d_rho1, d_rho2, d_rho3, d_frhop;
HAT::t_ffloat_1d h_rho, h_rho0, h_rho1, h_rho2, h_rho3, h_frhop;
DAT::tdual_ffloat_1d k_gamma, k_dgamma1, k_dgamma2, k_dgamma3, k_arho2b;
typename ArrayTypes<DeviceType>::t_ffloat_1d d_gamma, d_dgamma1, d_dgamma2, d_dgamma3, d_arho2b;
@ -108,20 +118,54 @@ public:
DAT::tdual_ffloat_2d k_arho1, k_arho2, k_arho3, k_arho3b, k_t_ave, k_tsq_ave;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_arho1, d_arho2, d_arho3, d_arho3b, d_t_ave, d_tsq_ave;
HAT::t_ffloat_2d h_arho1, h_arho2, h_arho3, h_arho3b, h_t_ave, h_tsq_ave;
DAT::tdual_ffloat_2d k_phir, k_phirar, k_phirar1, k_phirar2, k_phirar3, k_phirar4, k_phirar5, k_phirar6;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_phir, d_phirar, d_phirar1, d_phirar2, d_phirar3, d_phirar4, d_phirar5, d_phirar6;
HAT::t_ffloat_2d h_phir, h_phirar, h_phirar1, h_phirar2, h_phirar3, h_phirar4, h_phirar5, h_phirar6;
DAT::tdual_ffloat_1d k_scrfcn, k_dscrfcn, k_fcpair;
typename ArrayTypes<DeviceType>::t_ffloat_1d d_scrfcn, d_dscrfcn, d_fcpair;
HAT::t_ffloat_1d h_scrfcn, h_dscrfcn, h_fcpair;
protected:
int need_dup;
using KKDeviceType = typename KKDevice<DeviceType>::value;
template<typename DataType, typename Layout>
using DupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterDuplicated>;
template<typename DataType, typename Layout>
using NonDupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
DupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout> dup_rho0;
NonDupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout> ndup_rho0;
DupScatterView<typename decltype(d_arho2b)::data_type, typename decltype(d_arho2b)::array_layout> dup_arho2b;
NonDupScatterView<typename decltype(d_arho2b)::data_type, typename decltype(d_arho2b)::array_layout> ndup_arho2b;
DupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout> dup_arho1;
NonDupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout> ndup_arho1;
DupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout> dup_arho2;
NonDupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout> ndup_arho2;
DupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout> dup_arho3;
NonDupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout> ndup_arho3;
DupScatterView<typename decltype(d_arho3b)::data_type, typename decltype(d_arho3b)::array_layout> dup_arho3b;
NonDupScatterView<typename decltype(d_arho3b)::data_type, typename decltype(d_arho3b)::array_layout> ndup_arho3b;
DupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout> dup_t_ave;
NonDupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout> ndup_t_ave;
DupScatterView<typename decltype(d_tsq_ave)::data_type, typename decltype(d_tsq_ave)::array_layout> dup_tsq_ave;
NonDupScatterView<typename decltype(d_tsq_ave)::data_type, typename decltype(d_tsq_ave)::array_layout> ndup_tsq_ave;
DupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> dup_f;
NonDupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> ndup_f;
DupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout> dup_eatom;
NonDupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout> ndup_eatom;
DupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout> dup_vatom;
NonDupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout> ndup_vatom;
};
KOKKOS_INLINE_FUNCTION
static bool iszero_kk(const double f) {
return fabs(f) < 1e-20;
}
KOKKOS_INLINE_FUNCTION
static bool isone_kk(const double f) {
return fabs(f-1.0) < 1e-20;
}
KOKKOS_INLINE_FUNCTION
static double fdiv_zero_kk(const double n, const double d) {
@ -134,9 +178,5 @@ static double fdiv_zero_kk(const double n, const double d) {
}
#include "meam_impl_kokkos.h"
//#include "meam_setup_done_kokkos.h"
//#include "meam_funcs_kokkos.h"
//#include "meam_dens_init_kokkos.h"
//#include "meam_dens_final_kokkos.h"
//#include "meam_force_kokkos.h"
#endif

View File

@ -1,40 +1,30 @@
#include "meam_kokkos.h"
template<class DeviceType>
void MEAMKokkos<DeviceType>::meam_setup_done(void)
void MEAMKokkos<DeviceType>::meam_setup_done(double* cutmax)
{
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
MEAM::meam_setup_done(cutmax);
memoryKK->destroy_kokkos(k_phirar6,phirar6);
memoryKK->destroy_kokkos(k_phirar5,phirar5);
memoryKK->destroy_kokkos(k_phirar4,phirar4);
memoryKK->destroy_kokkos(k_phirar3,phirar3);
memoryKK->destroy_kokkos(k_phirar2,phirar2);
memoryKK->destroy_kokkos(k_phirar1,phirar1);
memoryKK->destroy_kokkos(k_phirar,phirar);
memoryKK->destroy_kokkos(k_phir,phir);
MemKK::realloc_kokkos(d_phir, "pair:phir", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar, "pair:phirar", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar1, "pair:phirar1", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar2, "pair:phirar2", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar3, "pair:phirar3", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar4, "pair:phirar4", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar5, "pair:phirar5", (neltypes * (neltypes + 1)) / 2, nr);
MemKK::realloc_kokkos(d_phirar6, "pair:phirar6", (neltypes * (neltypes + 1)) / 2, nr);
memoryKK->create_kokkos(k_phir, phir, (neltypes * (neltypes + 1)) / 2, nr, "pair:phir");
memoryKK->create_kokkos(k_phirar, phirar, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar");
memoryKK->create_kokkos(k_phirar1, phirar1, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar1");
memoryKK->create_kokkos(k_phirar2, phirar2, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar2");
memoryKK->create_kokkos(k_phirar3, phirar3, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar3");
memoryKK->create_kokkos(k_phirar4, phirar4, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar4");
memoryKK->create_kokkos(k_phirar5, phirar5, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar5");
memoryKK->create_kokkos(k_phirar6, phirar6, (neltypes * (neltypes + 1)) / 2, nr, "pair:phirar6");
h_phir = k_phir.h_view;
h_phirar = k_phirar.h_view;
h_phirar1 = k_phirar1.h_view;
h_phirar2 = k_phirar2.h_view;
h_phirar3 = k_phirar3.h_view;
h_phirar4 = k_phirar4.h_view;
h_phirar5 = k_phirar5.h_view;
h_phirar6 = k_phirar6.h_view;
auto h_phir = Kokkos::create_mirror_view(d_phir);
auto h_phirar = Kokkos::create_mirror_view(d_phirar);
auto h_phirar1 = Kokkos::create_mirror_view(d_phirar1);
auto h_phirar2 = Kokkos::create_mirror_view(d_phirar2);
auto h_phirar3 = Kokkos::create_mirror_view(d_phirar3);
auto h_phirar4 = Kokkos::create_mirror_view(d_phirar4);
auto h_phirar5 = Kokkos::create_mirror_view(d_phirar5);
auto h_phirar6 = Kokkos::create_mirror_view(d_phirar6);
for (int i = 0; i <(neltypes * (neltypes + 1)) / 2; i++)
for(int j = 0; j < nr; j++)
{
for(int j = 0; j < nr; j++) {
h_phir(i,j) = phir[i][j];
h_phirar(i,j) = phirar[i][j];
h_phirar1(i,j) = phirar1[i][j];
@ -44,28 +34,13 @@ void MEAMKokkos<DeviceType>::meam_setup_done(void)
h_phirar5(i,j) = phirar5[i][j];
h_phirar6(i,j) = phirar6[i][j];
}
k_phir.template modify<LMPHostType>();
k_phir.template sync<DeviceType>();
d_phir = k_phir.template view<DeviceType>();
k_phirar.template modify<LMPHostType>();
k_phirar.template sync<DeviceType>();
d_phirar = k_phirar.template view<DeviceType>();
k_phirar1.template modify<LMPHostType>();
k_phirar1.template sync<DeviceType>();
d_phirar1 = k_phirar1.template view<DeviceType>();
k_phirar2.template modify<LMPHostType>();
k_phirar2.template sync<DeviceType>();
d_phirar2 = k_phirar2.template view<DeviceType>();
k_phirar3.template modify<LMPHostType>();
k_phirar3.template sync<DeviceType>();
d_phirar3 = k_phirar3.template view<DeviceType>();
k_phirar4.template modify<LMPHostType>();
k_phirar4.template sync<DeviceType>();
d_phirar4 = k_phirar4.template view<DeviceType>();
k_phirar5.template modify<LMPHostType>();
k_phirar5.template sync<DeviceType>();
d_phirar5 = k_phirar5.template view<DeviceType>();
k_phirar6.template modify<LMPHostType>();
k_phirar6.template sync<DeviceType>();
d_phirar6 = k_phirar6.template view<DeviceType>();
Kokkos::deep_copy(d_phir,h_phir);
Kokkos::deep_copy(d_phirar,h_phirar);
Kokkos::deep_copy(d_phirar1,h_phirar1);
Kokkos::deep_copy(d_phirar2,h_phirar2);
Kokkos::deep_copy(d_phirar3,h_phirar3);
Kokkos::deep_copy(d_phirar4,h_phirar4);
Kokkos::deep_copy(d_phirar5,h_phirar5);
Kokkos::deep_copy(d_phirar6,h_phirar6);
}

View File

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
@ -12,80 +12,61 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Greg Wagner (SNL)
Contributing authors: Naga Vydyanathan (NVIDIA), Stan Moore (SNL)
------------------------------------------------------------------------- */
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
//KK*
#include "meam_kokkos.h"
#include "kokkos.h"
#include "pair_kokkos.h"
//#include "pair_meamc.h"
#include "pair_meam_kokkos.h"
#include "meam_kokkos.h"
#include "atom_kokkos.h"
//*KK
#include "force.h"
#include "comm.h"
//KK*
//#include "memory.h"
#include "memory_kokkos.h"
//*KK
#include "neighbor.h"
//KK*
//#include "neigh_list.h"
#include "neigh_list_kokkos.h"
//*KK
#include "neigh_request.h"
#include "error.h"
//*KK
#include "atom_masks.h"
//*KK
#include "comm.h"
#include "error.h"
#include "force.h"
#include "kokkos.h"
#include "memory_kokkos.h"
#include "neigh_list_kokkos.h"
#include "neigh_request.h"
#include "neighbor.h"
#include <cmath>
using namespace LAMMPS_NS;
#if 0
static const int nkeywords = 21;
static const char *keywords[] = {
"Ec","alpha","rho0","delta","lattce",
"attrac","repuls","nn2","Cmin","Cmax","rc","delr",
"augt1","gsmooth_factor","re","ialloy",
"mixture_ref_t","erose_form","zbl",
"emb_lin_neg","bkgd_dyn"};
#endif
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairMEAMKokkos<DeviceType>::PairMEAMKokkos(LAMMPS *lmp) : PairMEAM(lmp)
{
respa_enable = 0;
kokkosable = 1;
reverse_comm_device = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
meam_inst_kk = new MEAMKokkos<DeviceType>(memory);
delete meam_inst;
meam_inst_kk = new MEAMKokkos<DeviceType>(memory);
meam_inst = meam_inst_kk;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairMEAMKokkos<DeviceType>::~PairMEAMKokkos()
{
if (!copymode) {
if (copymode) return;
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->destroy_kokkos(k_vatom,vatom);
delete meam_inst_kk;
}
meam_inst = nullptr;
}
/* ---------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
@ -105,36 +86,32 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
}
if (vflag_atom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
d_vatom = k_vatom.view<DeviceType>();
}
atomKK->sync(execution_space,datamask_read);
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
// neighbor list info
NeighListKokkos<DeviceType>* k_halflist = static_cast<NeighListKokkos<DeviceType>*>(listhalf);
int inum_half = listhalf->inum;
int* numneigh_half = listhalf->numneigh;
int* ilist_half = listhalf->ilist;
NeighListKokkos<DeviceType>* k_halflist = static_cast<NeighListKokkos<DeviceType>*>(listhalf);
d_ilist_half = k_halflist->d_ilist;
d_numneigh_half = k_halflist->d_numneigh;
d_neighbors_half = k_halflist->d_neighbors;
NeighListKokkos<DeviceType>* k_fulllist = static_cast<NeighListKokkos<DeviceType>*>(listfull);
d_numneigh_full = k_fulllist->d_numneigh;
d_neighbors_full = k_fulllist->d_neighbors;
EV_FLOAT ev;
copymode = 1;
meam_inst_kk->copymode = 1;
// strip neighbor lists of any special bond flags before using with MEAM
// necessary before doing neigh_f2c and neigh_c2f conversions each step
if (neighbor->ago == 0) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMKernelNeighStrip >(0,inum_half),*this);
}
if (neighbor->ago == 0)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMNeighStrip >(0,inum_half),*this);
// check size of scrfcn based on half neighbor list
@ -142,152 +119,137 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
nall = nlocal + atom->nghost;
int n = 0;
//for (ii = 0; ii < inum_half; ii++) n += numneigh_half[ilist_half[ii]];
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMEAMKernelA>(0,inum_half), *this, n);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMEAMOffsets>(0,inum_half),*this,n);
meam_inst_kk->meam_dens_setup(atom->nmax, nall, n);
//double **x = atom->x;
x = atomKK->k_x.view<DeviceType>();
//double **f = atom->f;
f = atomKK->k_f.view<DeviceType>();
//int *type = atom->type;
type = atomKK->k_type.view<DeviceType>();
atomKK->sync(execution_space,datamask_read);
int ntype = atom->ntypes;
// 3 stages of MEAM calculation
// loop over my atoms followed by communication
int offset = 0;
int errorflag = 0;
#if 0
for (ii = 0; ii < inum_half; ii++) {
i = ilist_half[ii];
meam_inst->meam_dens_init(i,ntype,type,map,x,
numneigh_half[i],firstneigh_half[i],
numneigh_full[i],firstneigh_full[i],
offset);
offset += numneigh_half[i];
}
#endif
// To do: create the cumulative offset array in host and device
k_offset = DAT::tdual_int_1d("pair:offset",inum_half+1);
h_offset = k_offset.h_view;
d_offset = k_offset.template view<DeviceType>();
ArrayTypes<LMPHostType>::t_int_1d h_ilist;
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
h_ilist = Kokkos::create_mirror_view(k_halflist->d_ilist);
h_numneigh = Kokkos::create_mirror_view(k_halflist->d_numneigh);
Kokkos::deep_copy(h_ilist,k_halflist->d_ilist);
Kokkos::deep_copy(h_numneigh,k_halflist->d_numneigh);
d_offset = typename AT::t_int_1d("pair:offset",inum_half+1);
{
// local variables for lambda capture
h_offset[0] = 0;
for (int ii = 0; ii < inum_half; ii++) {
int i = h_ilist[ii];
h_offset[ii+1] = h_offset[ii] + h_numneigh[i];
auto l_ilist_half = d_ilist_half;
auto l_numneigh_half = d_numneigh_half;
auto l_offset = d_offset;
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
int i = l_ilist_half[ii];
m_fill += l_numneigh_half[i];
if (final)
l_offset[ii+1] = m_fill;
});
}
k_offset.template modify<LMPHostType>();
k_offset.template sync<DeviceType>();
meam_inst_kk->meam_dens_init(inum_half,ntype,type,d_map,x,d_numneigh_half,d_numneigh_full,&offset,d_ilist_half,d_neighbors_half, d_neighbors_full, d_offset, neighflag);
int need_dup = lmp->kokkos->need_dup<DeviceType>();
meam_inst_kk->meam_dens_init(inum_half,ntype,type,d_map,x,d_numneigh_half,d_numneigh_full,d_ilist_half,d_neighbors_half, d_neighbors_full, d_offset, neighflag, need_dup);
meam_inst_kk->k_rho0.template modify<DeviceType>();
meam_inst_kk->k_rho0.template sync<LMPHostType>();
meam_inst_kk->k_arho2b.template modify<DeviceType>();
meam_inst_kk->k_arho2b.template sync<LMPHostType>();
meam_inst_kk->k_arho1.template modify<DeviceType>();
meam_inst_kk->k_arho1.template sync<LMPHostType>();
meam_inst_kk->k_arho2.template modify<DeviceType>();
meam_inst_kk->k_arho2.template sync<LMPHostType>();
meam_inst_kk->k_arho3.template modify<DeviceType>();
meam_inst_kk->k_arho3.template sync<LMPHostType>();
meam_inst_kk->k_arho3b.template modify<DeviceType>();
meam_inst_kk->k_arho3b.template sync<LMPHostType>();
meam_inst_kk->k_t_ave.template modify<DeviceType>();
meam_inst_kk->k_t_ave.template sync<LMPHostType>();
meam_inst_kk->k_tsq_ave.template modify<DeviceType>();
meam_inst_kk->k_tsq_ave.template sync<LMPHostType>();
comm->reverse_comm(this);
meam_inst_kk->k_rho0.template modify<LMPHostType>();
meam_inst_kk->k_rho0.template sync<DeviceType>();
meam_inst_kk->k_arho2b.template modify<LMPHostType>();
meam_inst_kk->k_arho2b.template sync<DeviceType>();
meam_inst_kk->k_arho1.template modify<LMPHostType>();
meam_inst_kk->k_arho1.template sync<DeviceType>();
meam_inst_kk->k_arho2.template modify<LMPHostType>();
meam_inst_kk->k_arho2.template sync<DeviceType>();
meam_inst_kk->k_arho3.template modify<LMPHostType>();
meam_inst_kk->k_arho3.template sync<DeviceType>();
meam_inst_kk->k_arho3b.template modify<LMPHostType>();
meam_inst_kk->k_arho3b.template sync<DeviceType>();
meam_inst_kk->k_t_ave.template modify<LMPHostType>();
meam_inst_kk->k_t_ave.template sync<DeviceType>();
meam_inst_kk->k_tsq_ave.template modify<LMPHostType>();
meam_inst_kk->k_tsq_ave.template sync<DeviceType>();
meam_inst_kk->meam_dens_final(nlocal,eflag_either,eflag_global,eflag_atom,
&eng_vdwl,d_eatom,ntype,type,d_map,errorflag);
if (errorflag) {
char str[128];
sprintf(str,"MEAM library error %d",errorflag);
error->one(FLERR,str);
}
d_eatom,ntype,type,d_map,d_scale,errorflag,ev);
if (errorflag)
error->one(FLERR,"MEAM library error {}",errorflag);
meam_inst_kk->k_rho0.template modify<DeviceType>();
meam_inst_kk->k_rho1.template modify<DeviceType>();
meam_inst_kk->k_rho2.template modify<DeviceType>();
meam_inst_kk->k_rho3.template modify<DeviceType>();
meam_inst_kk->k_frhop.template modify<DeviceType>();
meam_inst_kk->k_gamma.template modify<DeviceType>();
meam_inst_kk->k_dgamma1.template modify<DeviceType>();
meam_inst_kk->k_dgamma2.template modify<DeviceType>();
meam_inst_kk->k_dgamma3.template modify<DeviceType>();
meam_inst_kk->k_arho2b.template modify<DeviceType>();
meam_inst_kk->k_arho1.template modify<DeviceType>();
meam_inst_kk->k_arho2.template modify<DeviceType>();
meam_inst_kk->k_arho3.template modify<DeviceType>();
meam_inst_kk->k_arho3b.template modify<DeviceType>();
meam_inst_kk->k_t_ave.template modify<DeviceType>();
meam_inst_kk->k_tsq_ave.template modify<DeviceType>();
comm->forward_comm(this);
offset = 0;
meam_inst_kk->k_rho0.template sync<DeviceType>();
meam_inst_kk->k_rho1.template sync<DeviceType>();
meam_inst_kk->k_rho2.template sync<DeviceType>();
meam_inst_kk->k_rho3.template sync<DeviceType>();
meam_inst_kk->k_frhop.template sync<DeviceType>();
meam_inst_kk->k_gamma.template sync<DeviceType>();
meam_inst_kk->k_dgamma1.template sync<DeviceType>();
meam_inst_kk->k_dgamma2.template sync<DeviceType>();
meam_inst_kk->k_dgamma3.template sync<DeviceType>();
meam_inst_kk->k_arho2b.template sync<DeviceType>();
meam_inst_kk->k_arho1.template sync<DeviceType>();
meam_inst_kk->k_arho2.template sync<DeviceType>();
meam_inst_kk->k_arho3.template sync<DeviceType>();
meam_inst_kk->k_arho3b.template sync<DeviceType>();
meam_inst_kk->k_t_ave.template sync<DeviceType>();
meam_inst_kk->k_tsq_ave.template sync<DeviceType>();
// vptr is first value in vatom if it will be used by meam_force()
// else vatom may not exist, so pass dummy ptr
meam_inst_kk->meam_force(inum_half,eflag_global,eflag_atom,vflag_global,
vflag_atom,d_eatom,ntype,type,d_map,x,
d_numneigh_half, d_numneigh_full,f,d_vatom,
d_ilist_half, d_offset, d_neighbors_half, d_neighbors_full,
neighflag, need_dup, ev);
#if 0 // To do: is this correct? vflag_atom is used to access vatom
typename ArrayTypes<DeviceType>::t_virial_array vptr;
if (vflag_atom) vptr = d_vatom;
else vptr = NULL;
for (ii = 0; ii < inum_half; ii++) {
i = ilist_half[ii];
meam_inst->meam_force(i,eflag_either,eflag_global,eflag_atom,
vflag_atom,&eng_vdwl,eatom,ntype,type,map,x,
numneigh_half[i],firstneigh_half[i],
numneigh_full[i],firstneigh_full[i],
offset,f,vptr);
offset += numneigh_half[i];
if (eflag_global) eng_vdwl += ev.evdwl;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
#endif
meam_inst_kk->meam_force(inum_half, eflag_either,eflag_global,eflag_atom,
vflag_atom,&eng_vdwl,d_eatom,ntype,type,d_map,x,
d_numneigh_half, d_numneigh_full,f,d_vatom,d_ilist_half, d_offset, d_neighbors_half, d_neighbors_full, neighflag);
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
k_eatom.sync_host();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
k_vatom.sync_host();
}
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
copymode = 0;
meam_inst_kk->copymode = 0;
}
/* ----------------------------------------------------------------------
@ -298,24 +260,22 @@ void PairMEAMKokkos<DeviceType>::coeff(int narg, char **arg)
{
PairMEAM::coeff(narg,arg);
//sync map
// sync map and scale
int n = atom->ntypes;
k_map = DAT::tdual_int_1d("pair:map",n+1);
HAT::t_int_1d h_map = k_map.h_view;
MemKK::realloc_kokkos(d_map,"pair:map",n+1);
MemKK::realloc_kokkos(d_scale,"pair:scale",n+1,n+1);
auto h_map = Kokkos::create_mirror_view(d_map);
auto h_scale = Kokkos::create_mirror_view(d_scale);
for (int i = 1; i <= n; i++)
for (int i = 1; i <= n; i++) {
h_map[i] = map[i];
for (int j = 1; j <= n; j++)
h_scale(i,j) = scale[i][j];
}
k_map.template modify<LMPHostType>();
k_map.template sync<DeviceType>();
d_map = k_map.template view<DeviceType>();
// To do: need to synchronize phirar variables
meam_inst_kk->meam_setup_done();
Kokkos::deep_copy(d_map,h_map);
Kokkos::deep_copy(d_scale,h_scale);
}
/* ----------------------------------------------------------------------
@ -324,24 +284,27 @@ void PairMEAMKokkos<DeviceType>::coeff(int narg, char **arg)
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::init_style()
{
PairMEAM::init_style();
// adjust neighbor list request for KOKKOS
neighflag = lmp->kokkos->neighflag;
auto request = neighbor->find_request(this);
// MEAM needs both a full and half neighbor list? Not sure how to get that.
if (!(neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD))
error->all(FLERR, "Cannot use chosen neighbor list style with pair meam/kk");
auto request = neighbor->find_request(this,1);
request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
!std::is_same<DeviceType,LMPDeviceType>::value);
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
if (neighflag == FULL) request->enable_full();
request = neighbor->find_request(this,2);
request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
!std::is_same<DeviceType,LMPDeviceType>::value);
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
if (neighflag == FULL)
error->all(FLERR,"Must use half neighbor list style with pair meam/kk");
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
int pbc_flag, int *pbc)
@ -350,9 +313,11 @@ int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMPackForwardComm>(0,n),*this);
return n;
return n*38;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackForwardComm, const int &i) const {
@ -390,6 +355,7 @@ void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackForwardComm, const in
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
{
@ -398,6 +364,8 @@ void PairMEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in,
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMUnpackForwardComm>(0,n),*this);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackForwardComm, const int &i) const{
@ -434,16 +402,32 @@ void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackForwardComm, const
meam_inst_kk->d_tsq_ave(i+first,2) = v_buf[m++];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMEAMKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
int i,j,k,m;
meam_inst_kk->k_rho0.sync_host();
meam_inst_kk->k_rho1.sync_host();
meam_inst_kk->k_rho2.sync_host();
meam_inst_kk->k_rho3.sync_host();
meam_inst_kk->k_frhop.sync_host();
meam_inst_kk->k_gamma.sync_host();
meam_inst_kk->k_dgamma1.sync_host();
meam_inst_kk->k_dgamma2.sync_host();
meam_inst_kk->k_dgamma3.sync_host();
meam_inst_kk->k_arho2b.sync_host();
meam_inst_kk->k_arho1.sync_host();
meam_inst_kk->k_arho2.sync_host();
meam_inst_kk->k_arho3.sync_host();
meam_inst_kk->k_arho3b.sync_host();
meam_inst_kk->k_t_ave.sync_host();
meam_inst_kk->k_tsq_ave.sync_host();
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
buf[m++] = meam_inst_kk->h_rho0[j];
buf[m++] = meam_inst_kk->h_rho1[j];
buf[m++] = meam_inst_kk->h_rho2[j];
@ -463,7 +447,7 @@ int PairMEAMKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
buf[m++] = meam_inst_kk->h_arho2(j,3);
buf[m++] = meam_inst_kk->h_arho2(j,4);
buf[m++] = meam_inst_kk->h_arho2(j,5);
for (k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(j,k);
for (int k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(j,k);
buf[m++] = meam_inst_kk->h_arho3b(j,0);
buf[m++] = meam_inst_kk->h_arho3b(j,1);
buf[m++] = meam_inst_kk->h_arho3b(j,2);
@ -478,14 +462,31 @@ int PairMEAMKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
int i,k,m,last;
meam_inst_kk->k_rho0.sync_host();
meam_inst_kk->k_rho1.sync_host();
meam_inst_kk->k_rho2.sync_host();
meam_inst_kk->k_rho3.sync_host();
meam_inst_kk->k_frhop.sync_host();
meam_inst_kk->k_gamma.sync_host();
meam_inst_kk->k_dgamma1.sync_host();
meam_inst_kk->k_dgamma2.sync_host();
meam_inst_kk->k_dgamma3.sync_host();
meam_inst_kk->k_arho2b.sync_host();
meam_inst_kk->k_arho1.sync_host();
meam_inst_kk->k_arho2.sync_host();
meam_inst_kk->k_arho3.sync_host();
meam_inst_kk->k_arho3b.sync_host();
meam_inst_kk->k_t_ave.sync_host();
meam_inst_kk->k_tsq_ave.sync_host();
m = 0;
last = first + n;
for (i = first; i < last; i++) {
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
meam_inst_kk->h_rho0[i] = buf[m++];
meam_inst_kk->h_rho1[i] = buf[m++];
meam_inst_kk->h_rho2[i] = buf[m++];
@ -505,7 +506,7 @@ void PairMEAMKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *b
meam_inst_kk->h_arho2(i,3) = buf[m++];
meam_inst_kk->h_arho2(i,4) = buf[m++];
meam_inst_kk->h_arho2(i,5) = buf[m++];
for (k = 0; k < 10; k++) meam_inst_kk->h_arho3(i,k) = buf[m++];
for (int k = 0; k < 10; k++) meam_inst_kk->h_arho3(i,k) = buf[m++];
meam_inst_kk->h_arho3b(i,0) = buf[m++];
meam_inst_kk->h_arho3b(i,1) = buf[m++];
meam_inst_kk->h_arho3b(i,2) = buf[m++];
@ -516,17 +517,83 @@ void PairMEAMKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *b
meam_inst_kk->h_tsq_ave(i,1) = buf[m++];
meam_inst_kk->h_tsq_ave(i,2) = buf[m++];
}
meam_inst_kk->k_rho0.modify_host();
meam_inst_kk->k_rho1.modify_host();
meam_inst_kk->k_rho2.modify_host();
meam_inst_kk->k_rho3.modify_host();
meam_inst_kk->k_frhop.modify_host();
meam_inst_kk->k_gamma.modify_host();
meam_inst_kk->k_dgamma1.modify_host();
meam_inst_kk->k_dgamma2.modify_host();
meam_inst_kk->k_dgamma3.modify_host();
meam_inst_kk->k_arho2b.modify_host();
meam_inst_kk->k_arho1.modify_host();
meam_inst_kk->k_arho2.modify_host();
meam_inst_kk->k_arho3.modify_host();
meam_inst_kk->k_arho3b.modify_host();
meam_inst_kk->k_t_ave.modify_host();
meam_inst_kk->k_tsq_ave.modify_host();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMEAMKokkos<DeviceType>::pack_reverse_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
{
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMPackReverseComm>(0,n),*this);
return n*30;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackReverseComm, const int &i) const {
int m = i*30;
v_buf[m++] = meam_inst_kk->d_rho0[i+first];
v_buf[m++] = meam_inst_kk->d_arho2b[i+first];
v_buf[m++] = meam_inst_kk->d_arho1(i+first,0);
v_buf[m++] = meam_inst_kk->d_arho1(i+first,1);
v_buf[m++] = meam_inst_kk->d_arho1(i+first,2);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,0);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,1);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,2);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,3);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,4);
v_buf[m++] = meam_inst_kk->d_arho2(i+first,5);
for (int k = 0; k < 10; k++) v_buf[m++] = meam_inst_kk->d_arho3(i+first,k);
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,0);
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,1);
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,2);
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,0);
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,1);
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,2);
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,0);
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,1);
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,2);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMEAMKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
int i,k,m,last;
meam_inst_kk->k_rho0.sync_host();
meam_inst_kk->k_arho2b.sync_host();
meam_inst_kk->k_arho1.sync_host();
meam_inst_kk->k_arho2.sync_host();
meam_inst_kk->k_arho3.sync_host();
meam_inst_kk->k_arho3b.sync_host();
meam_inst_kk->k_t_ave.sync_host();
meam_inst_kk->k_tsq_ave.sync_host();
m = 0;
last = first + n;
for (i = first; i < last; i++) {
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = meam_inst_kk->h_rho0[i];
buf[m++] = meam_inst_kk->h_arho2b[i];
buf[m++] = meam_inst_kk->h_arho1(i,0);
@ -538,7 +605,7 @@ int PairMEAMKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
buf[m++] = meam_inst_kk->h_arho2(i,3);
buf[m++] = meam_inst_kk->h_arho2(i,4);
buf[m++] = meam_inst_kk->h_arho2(i,5);
for (k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(i,k);
for (int k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(i,k);
buf[m++] = meam_inst_kk->h_arho3b(i,0);
buf[m++] = meam_inst_kk->h_arho3b(i,1);
buf[m++] = meam_inst_kk->h_arho3b(i,2);
@ -554,14 +621,64 @@ int PairMEAMKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::unpack_reverse_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf)
{
d_sendlist = k_sendlist.view<DeviceType>();
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMUnpackReverseComm>(0,n),*this);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackReverseComm, const int &i) const {
int j = d_sendlist(iswap, i);
int m = i*30;
meam_inst_kk->d_rho0[j] += v_buf[m++];
meam_inst_kk->d_arho2b[j] += v_buf[m++];
meam_inst_kk->d_arho1(j,0) += v_buf[m++];
meam_inst_kk->d_arho1(j,1) += v_buf[m++];
meam_inst_kk->d_arho1(j,2) += v_buf[m++];
meam_inst_kk->d_arho2(j,0) += v_buf[m++];
meam_inst_kk->d_arho2(j,1) += v_buf[m++];
meam_inst_kk->d_arho2(j,2) += v_buf[m++];
meam_inst_kk->d_arho2(j,3) += v_buf[m++];
meam_inst_kk->d_arho2(j,4) += v_buf[m++];
meam_inst_kk->d_arho2(j,5) += v_buf[m++];
for (int k = 0; k < 10; k++) meam_inst_kk->d_arho3(j,k) += v_buf[m++];
meam_inst_kk->d_arho3b(j,0) += v_buf[m++];
meam_inst_kk->d_arho3b(j,1) += v_buf[m++];
meam_inst_kk->d_arho3b(j,2) += v_buf[m++];
meam_inst_kk->d_t_ave(j,0) += v_buf[m++];
meam_inst_kk->d_t_ave(j,1) += v_buf[m++];
meam_inst_kk->d_t_ave(j,2) += v_buf[m++];
meam_inst_kk->d_tsq_ave(j,0) += v_buf[m++];
meam_inst_kk->d_tsq_ave(j,1) += v_buf[m++];
meam_inst_kk->d_tsq_ave(j,2) += v_buf[m++];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMEAMKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,k,m;
meam_inst_kk->k_rho0.sync_host();
meam_inst_kk->k_arho2b.sync_host();
meam_inst_kk->k_arho1.sync_host();
meam_inst_kk->k_arho2.sync_host();
meam_inst_kk->k_arho3.sync_host();
meam_inst_kk->k_arho3b.sync_host();
meam_inst_kk->k_t_ave.sync_host();
meam_inst_kk->k_tsq_ave.sync_host();
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
meam_inst_kk->h_rho0[j] += buf[m++];
meam_inst_kk->h_arho2b[j] += buf[m++];
meam_inst_kk->h_arho1(j,0) += buf[m++];
@ -573,7 +690,7 @@ void PairMEAMKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *b
meam_inst_kk->h_arho2(j,3) += buf[m++];
meam_inst_kk->h_arho2(j,4) += buf[m++];
meam_inst_kk->h_arho2(j,5) += buf[m++];
for (k = 0; k < 10; k++) meam_inst_kk->h_arho3(j,k) += buf[m++];
for (int k = 0; k < 10; k++) meam_inst_kk->h_arho3(j,k) += buf[m++];
meam_inst_kk->h_arho3b(j,0) += buf[m++];
meam_inst_kk->h_arho3b(j,1) += buf[m++];
meam_inst_kk->h_arho3b(j,2) += buf[m++];
@ -584,46 +701,48 @@ void PairMEAMKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *b
meam_inst_kk->h_tsq_ave(j,1) += buf[m++];
meam_inst_kk->h_tsq_ave(j,2) += buf[m++];
}
meam_inst_kk->k_rho0.modify_host();
meam_inst_kk->k_arho2b.modify_host();
meam_inst_kk->k_arho1.modify_host();
meam_inst_kk->k_arho2.modify_host();
meam_inst_kk->k_arho3.modify_host();
meam_inst_kk->k_arho3b.modify_host();
meam_inst_kk->k_t_ave.modify_host();
meam_inst_kk->k_tsq_ave.modify_host();
}
/* ----------------------------------------------------------------------
memory usage of local atom-based arrays
strip special bond flags from neighbor list entries
are not used with MEAM
need to do here so Fortran lib doesn't see them
done once per reneighbor so that neigh_f2c and neigh_c2f don't see them
------------------------------------------------------------------------- */
template<class DeviceType>
double PairMEAMKokkos<DeviceType>::memory_usage()
{
double bytes = 11 * meam_inst_kk->nmax * sizeof(double);
bytes += (3 + 6 + 10 + 3 + 3 + 3) * meam_inst_kk->nmax * sizeof(double);
bytes += 3 * meam_inst_kk->maxneigh * sizeof(double);
return bytes;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMKernelNeighStrip, const int &ii) const {
/* ----------------------------------------------------------------------
* strip special bond flags from neighbor list entries
* are not used with MEAM
* need to do here so Fortran lib doesn't see them
* done once per reneighbor so that neigh_f2c and neigh_c2f don't see them
* ------------------------------------------------------------------------- */
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMNeighStrip, const int &ii) const {
const int i = d_ilist_half[ii];
const int jnum_half = d_numneigh_half[i];
const int jnum_full = d_numneigh_full[i];
for (int jj = 0; jj < jnum_half; jj++) {
for (int jj = 0; jj < jnum_half; jj++)
d_neighbors_half(i,jj) &= NEIGHMASK;
}
for (int jj = 0; jj < jnum_full; jj++) {
for (int jj = 0; jj < jnum_full; jj++)
d_neighbors_full(i,jj) &= NEIGHMASK;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMKernelA, const int ii, int &n) const {
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMOffsets, const int ii, int &n) const {
const int i = d_ilist_half[ii];
n += d_numneigh_half[i];
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class PairMEAMKokkos<LMPDeviceType>;
#ifdef KOKKOS_ENABLE_CUDA

View File

@ -1,6 +1,6 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
@ -12,31 +12,33 @@
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
// clang-format off
PairStyle(meam/c/kk,PairMEAMKokkos<LMPDeviceType>)
PairStyle(meam/c/kk/device,PairMEAMKokkos<LMPDeviceType>)
PairStyle(meam/c/kk/host,PairMEAMKokkos<LMPHostType>)
PairStyle(meam/kk,PairMEAMKokkos<LMPDeviceType>)
PairStyle(meam/kk/device,PairMEAMKokkos<LMPDeviceType>)
PairStyle(meam/kk/host,PairMEAMKokkos<LMPHostType>)
// clang-format on
#else
#ifndef LMP_PAIR_MEAMC_KOKKOS_H
#define LMP_PAIR_MEAMC_KOKKOS_H
// clang-format off
#ifndef LMP_PAIR_MEAM_KOKKOS_H
#define LMP_PAIR_MEAM_KOKKOS_H
#include "kokkos_base.h"
#include "pair_kokkos.h"
#include "pair_meam.h"
#include "neigh_list_kokkos.h"
#include "meam_kokkos.h"
namespace LAMMPS_NS {
struct TagPairMEAMKernelNeighStrip{};
struct TagPairMEAMKernelA{};
struct TagPairMEAMNeighStrip{};
struct TagPairMEAMOffsets{};
struct TagPairMEAMPackForwardComm{};
struct TagPairMEAMUnpackForwardComm{};
struct TagPairMEAMPackReverseComm{};
struct TagPairMEAMUnpackReverseComm{};
template<class DeviceType>
class MEAMKokkos;
@ -48,13 +50,13 @@ class PairMEAMKokkos : public PairMEAM, public KokkosBase {
enum {COUL_FLAG=0};
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
//typedef EV_FLOAT value_type;
typedef int value_type;
PairMEAMKokkos(class LAMMPS *);
virtual ~PairMEAMKokkos();
void compute(int, int);
void coeff(int, char **);
void init_style();
~PairMEAMKokkos() override;
void compute(int, int) override;
void coeff(int, char **) override;
void init_style() override;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMEAMPackForwardComm, const int&) const;
@ -63,104 +65,59 @@ class PairMEAMKokkos : public PairMEAM, public KokkosBase {
void operator()(TagPairMEAMUnpackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMEAMKernelNeighStrip, const int&) const;
void operator()(TagPairMEAMPackReverseComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMEAMKernelA, const int, int&) const;
void operator()(TagPairMEAMUnpackReverseComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMEAMNeighStrip, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMEAMOffsets, const int, int&) const;
int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
int, int *);
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
double memory_usage();
int, int *) override;
int pack_forward_comm(int, int *, double *, int, int *) override;
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
void unpack_forward_comm(int, int, double *) override;
int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
int pack_reverse_comm(int, int, double *) override;
void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
int, DAT::tdual_xfloat_1d&) override;
void unpack_reverse_comm(int, int *, double *) override;
protected:
class MEAMKokkos<DeviceType> *meam_inst_kk;
typename AT::t_x_array_randomread x;
typename AT::t_x_array x;
typename AT::t_f_array f;
typename AT::t_int_1d_randomread type;
typename AT::t_int_1d type;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
DAT::tdual_int_1d k_offset;
HAT::t_int_1d h_offset;
typename AT::t_int_1d_randomread d_offset;
typename AT::t_int_1d d_offset;
DAT::tdual_int_1d k_map;
typename AT::t_int_1d_randomread d_map;
typename AT::t_int_1d_randomread d_ilist_half;
typename AT::t_int_1d_randomread d_numneigh_half;
typename AT::t_int_1d d_map;
typename AT::t_int_2d d_scale;
typename AT::t_int_1d d_ilist_half;
typename AT::t_int_1d d_numneigh_half;
typename AT::t_neighbors_2d d_neighbors_half;
typename AT::t_int_1d_randomread d_numneigh_full;
typename AT::t_int_1d d_numneigh_full;
typename AT::t_neighbors_2d d_neighbors_full;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um v_buf;
int iswap,first;
int neighflag,nlocal,nall,eflag,vflag;
int iswap;
int first;
friend void pair_virial_fdotr_compute<PairMEAMKokkos>(PairMEAMKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: MEAM library error %d
A call to the MEAM Fortran library returned an error.
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair style MEAM requires newton pair on
See the newton command. This is a restriction to use the MEAM
potential.
E: Cannot open MEAM potential file %s
The specified MEAM potential file cannot be opened. Check that the
path and name are correct.
E: Incorrect format in MEAM potential file
Incorrect number of words per line in the potential file.
E: Unrecognized lattice type in MEAM file 1
The lattice type in an entry of the MEAM library file is not
valid.
E: Did not find all elements in MEAM library file
The requested elements were not found in the MEAM file.
E: Keyword %s in MEAM parameter file not recognized
Self-explanatory.
E: Unrecognized lattice type in MEAM file 2
The lattice type in an entry of the MEAM parameter file is not
valid.
*/

View File

@ -27,7 +27,9 @@ typedef enum { FCC, BCC, HCP, DIM, DIA, DIA3, B1, C11, L12, B2, CH4, LIN, ZIG, T
class MEAM {
public:
MEAM(Memory *mem);
~MEAM();
virtual ~MEAM();
int copymode;
protected:
Memory *memory;
@ -285,8 +287,8 @@ class MEAM {
double *rozero, int *ibar);
void meam_setup_param(int which, double value, int nindex, int *index /*index(3)*/,
int *errorflag);
void meam_setup_done(double *cutmax);
void meam_dens_setup(int atom_nmax, int nall, int n_neigh);
virtual void meam_setup_done(double *cutmax);
virtual void meam_dens_setup(int atom_nmax, int nall, int n_neigh);
void meam_dens_init(int i, int ntype, int *type, int *fmap, double **x, int numneigh,
int *firstneigh, int numneigh_full, int *firstneigh_full, int fnoffset);
void meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom,

View File

@ -36,6 +36,7 @@ MEAM::MEAM(Memory* mem)
maxneigh = 0;
scrfcn = dscrfcn = fcpair = nullptr;
copymode = 0;
neltypes = 0;
for (int i = 0; i < maxelt; i++) {
@ -53,6 +54,8 @@ MEAM::MEAM(Memory* mem)
MEAM::~MEAM()
{
if (copymode) return;
memory->destroy(this->phirar6);
memory->destroy(this->phirar5);
memory->destroy(this->phirar4);

View File

@ -75,6 +75,7 @@ PairMEAM::~PairMEAM()
{
if (copymode) return;
if (meam_inst)
delete meam_inst;
if (allocated) {

View File

@ -52,7 +52,7 @@ class PairMEAM : public Pair {
double **scale; // scaling factor for adapt
virtual void allocate();
void allocate();
void read_files(const std::string &, const std::string &);
void read_global_meam_file(const std::string &);
void read_user_meam_file(const std::string &);

View File

@ -126,6 +126,7 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
datamask_modify = ALL_MASK;
kokkosable = 0;
reverse_comm_device = 0;
copymode = 0;
}

View File

@ -123,6 +123,7 @@ class Pair : protected Pointers {
ExecutionSpace execution_space;
unsigned int datamask_read, datamask_modify;
int kokkosable; // 1 if Kokkos pair
int reverse_comm_device; // 1 if reverse comm on Device
Pair(class LAMMPS *);
~Pair() override;