Merge pull request #3328 from stanmoore1/kk_meam_release
Add Kokkos version of pair MEAM
This commit is contained in:
@ -194,7 +194,7 @@ OPT.
|
||||
* :doc:`lubricateU/poly <pair_lubricateU>`
|
||||
* :doc:`mdpd <pair_mesodpd>`
|
||||
* :doc:`mdpd/rhosum <pair_mesodpd>`
|
||||
* :doc:`meam <pair_meam>`
|
||||
* :doc:`meam (k) <pair_meam>`
|
||||
* :doc:`meam/spline (o) <pair_meam_spline>`
|
||||
* :doc:`meam/sw/spline <pair_meam_sw_spline>`
|
||||
* :doc:`mesocnt <pair_mesocnt>`
|
||||
|
||||
@ -71,7 +71,7 @@ Syntax
|
||||
*no_affinity* values = none
|
||||
*kokkos* args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *comm/pair/forward* *comm/fix/forward* or *comm/reverse* or *gpu/aware* or *pair/only*
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *comm/pair/forward* *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *gpu/aware* or *pair/only*
|
||||
*neigh* value = *full* or *half*
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
@ -96,6 +96,7 @@ Syntax
|
||||
*comm/pair/forward* value = *no* or *device*
|
||||
*comm/fix/forward* value = *no* or *device*
|
||||
*comm/reverse* value = *no* or *host* or *device*
|
||||
*comm/pair/reverse* value = *no* or *device*
|
||||
no = perform communication pack/unpack in non-KOKKOS mode
|
||||
host = perform pack/unpack on host (e.g. with OpenMP threading)
|
||||
device = perform pack/unpack on device (e.g. on GPU)
|
||||
@ -500,7 +501,7 @@ rule of thumb may give too large a binsize and the default should be
|
||||
overridden with a smaller value.
|
||||
|
||||
The *comm* and *comm/exchange* and *comm/forward* and *comm/pair/forward*
|
||||
and *comm/fix/forward* and comm/reverse*
|
||||
and *comm/fix/forward* and *comm/reverse* and *comm/pair/reverse*
|
||||
keywords determine whether the host or device performs the packing and
|
||||
unpacking of data when communicating per-atom data between processors.
|
||||
"Exchange" communication happens only on timesteps that neighbor lists
|
||||
@ -521,9 +522,16 @@ packing/unpacking data for the communication. A value of *host* means to
|
||||
use the host, typically a multi-core CPU, and perform the
|
||||
packing/unpacking in parallel with threads. A value of *device* means to
|
||||
use the device, typically a GPU, to perform the packing/unpacking
|
||||
operation. If a value of *host* is used for the *comm/pair/forward* or
|
||||
*comm/fix/forward* keyword, it will be automatically be changed to *no*
|
||||
since these keywords don't support *host* mode.
|
||||
operation.
|
||||
|
||||
For the *comm/pair/forward* or *comm/fix/forward* or *comm/pair/reverse*
|
||||
keywords, if a value of *host* is used it will be automatically
|
||||
be changed to *no* since these keywords don't support *host* mode. The
|
||||
value of *no* will also always be used when running on the CPU, i.e. setting
|
||||
the value to *device* will have no effect if the pair/fix style is
|
||||
running on the CPU. For the *comm/fix/forward* or *comm/pair/reverse*
|
||||
keywords, not all styles support *device* mode and in that case will run
|
||||
in *no* mode instead.
|
||||
|
||||
The optimal choice for these keywords depends on the input script and
|
||||
the hardware used. The *no* value is useful for verifying that the
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
.. index:: pair_style meam
|
||||
.. index:: pair_style meam/kk
|
||||
|
||||
pair_style meam command
|
||||
=========================
|
||||
|
||||
Accelerator Variants: *meam/kk*
|
||||
|
||||
Syntax
|
||||
""""""
|
||||
|
||||
@ -347,6 +350,12 @@ Most published MEAM parameter sets use the default values *attrac* = *repulse* =
|
||||
Setting *repuls* = *attrac* = *delta* corresponds to the form used in several
|
||||
recent published MEAM parameter sets, such as :ref:`(Valone) <Valone>`
|
||||
|
||||
----------
|
||||
|
||||
.. include:: accel_styles.rst
|
||||
|
||||
----------
|
||||
|
||||
.. note::
|
||||
|
||||
The default form of the *erose* expression in LAMMPS was corrected
|
||||
|
||||
@ -127,6 +127,10 @@ if (test $1 = "MANYBODY") then
|
||||
depend OPENMP
|
||||
fi
|
||||
|
||||
if (test $1 = "MEAM") then
|
||||
depend KOKKOS
|
||||
fi
|
||||
|
||||
if (test $1 = "MOLECULE") then
|
||||
depend EXTRA-MOLECULE
|
||||
depend GPU
|
||||
|
||||
@ -182,6 +182,13 @@ action kokkos_base.h
|
||||
action kokkos_base_fft.h fft3d.h
|
||||
action kokkos_few.h
|
||||
action kokkos_type.h
|
||||
action meam_kokkos.h meam.h
|
||||
action meam_dens_final_kokkos.h meam_dens_final.cpp
|
||||
action meam_dens_init_kokkos.h meam_dens_init.cpp
|
||||
action meam_force_kokkos.h meam_force.cpp
|
||||
action meam_funcs_kokkos.h meam_funcs.cpp
|
||||
action meam_impl_kokkos.h meam_impl.cpp
|
||||
action meam_setup_done_kokkos.h meam_setup_done.cpp
|
||||
action memory_kokkos.h
|
||||
action modify_kokkos.cpp
|
||||
action modify_kokkos.h
|
||||
@ -287,6 +294,8 @@ action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp
|
||||
action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h
|
||||
action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
|
||||
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
|
||||
action pair_meam_kokkos.cpp pair_meam.cpp
|
||||
action pair_meam_kokkos.h pair_meam.h
|
||||
action pair_morse_kokkos.cpp
|
||||
action pair_morse_kokkos.h
|
||||
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
|
||||
|
||||
@ -109,6 +109,7 @@ void CommKokkos::init()
|
||||
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
||||
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
||||
forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
|
||||
reverse_pair_comm_classic = lmp->kokkos->reverse_pair_comm_classic;
|
||||
forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
|
||||
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
|
||||
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
||||
@ -478,12 +479,13 @@ void CommKokkos::forward_comm_device(Pair *pair)
|
||||
int nsize = pair->comm_forward;
|
||||
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
|
||||
|
||||
int nmax = max_buf_pair;
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
int n = MAX(max_buf_pair,nsize*sendnum[iswap]);
|
||||
n = MAX(n,nsize*recvnum[iswap]);
|
||||
if (n > max_buf_pair)
|
||||
grow_buf_pair(n);
|
||||
nmax = MAX(nmax,nsize*sendnum[iswap]);
|
||||
nmax = MAX(nmax,nsize*recvnum[iswap]);
|
||||
}
|
||||
if (nmax > max_buf_pair)
|
||||
grow_buf_pair(nmax);
|
||||
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
|
||||
@ -545,8 +547,76 @@ void CommKokkos::grow_buf_fix(int n) {
|
||||
|
||||
void CommKokkos::reverse_comm(Pair *pair)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(pair);
|
||||
if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(pair);
|
||||
} else {
|
||||
k_sendlist.sync<LMPDeviceType>();
|
||||
reverse_comm_device<LMPDeviceType>(pair);
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::reverse_comm_device(Pair *pair)
|
||||
{
|
||||
int iswap,n;
|
||||
MPI_Request request;
|
||||
DAT::tdual_xfloat_1d k_buf_tmp;
|
||||
|
||||
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
|
||||
|
||||
int nsize = MAX(pair->comm_reverse,pair->comm_reverse_off);
|
||||
|
||||
int nmax = max_buf_pair;
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
nmax = MAX(nmax,nsize*sendnum[iswap]);
|
||||
nmax = MAX(nmax,nsize*recvnum[iswap]);
|
||||
}
|
||||
if (nmax > max_buf_pair)
|
||||
grow_buf_pair(nmax);
|
||||
|
||||
for (iswap = nswap-1; iswap >= 0; iswap--) {
|
||||
|
||||
// pack buffer
|
||||
|
||||
n = pairKKBase->pack_reverse_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send_pair);
|
||||
DeviceType().fence();
|
||||
|
||||
// exchange with another proc
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
double* buf_send_pair;
|
||||
double* buf_recv_pair;
|
||||
if (lmp->kokkos->gpu_aware_flag) {
|
||||
buf_send_pair = k_buf_send_pair.view<DeviceType>().data();
|
||||
buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data();
|
||||
} else {
|
||||
k_buf_send_pair.modify<DeviceType>();
|
||||
k_buf_send_pair.sync<LMPHostType>();
|
||||
buf_send_pair = k_buf_send_pair.h_view.data();
|
||||
buf_recv_pair = k_buf_recv_pair.h_view.data();
|
||||
}
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv_pair,nsize*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send_pair,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (!lmp->kokkos->gpu_aware_flag) {
|
||||
k_buf_recv_pair.modify<LMPHostType>();
|
||||
k_buf_recv_pair.sync<DeviceType>();
|
||||
}
|
||||
k_buf_tmp = k_buf_recv_pair;
|
||||
} else k_buf_tmp = k_buf_send_pair;
|
||||
|
||||
// unpack buffer
|
||||
|
||||
pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_tmp);
|
||||
DeviceType().fence();
|
||||
}
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm(Dump *dump)
|
||||
|
||||
@ -27,6 +27,7 @@ class CommKokkos : public CommBrick {
|
||||
bool exchange_comm_classic;
|
||||
bool forward_comm_classic;
|
||||
bool forward_pair_comm_classic;
|
||||
bool reverse_pair_comm_classic;
|
||||
bool forward_fix_comm_classic;
|
||||
bool reverse_comm_classic;
|
||||
bool exchange_comm_on_host;
|
||||
@ -58,6 +59,7 @@ class CommKokkos : public CommBrick {
|
||||
template<class DeviceType> void forward_comm_device(int dummy);
|
||||
template<class DeviceType> void reverse_comm_device();
|
||||
template<class DeviceType> void forward_comm_device(Pair *pair);
|
||||
template<class DeviceType> void reverse_comm_device(Pair *pair);
|
||||
template<class DeviceType> void forward_comm_device(Fix *fix, int size=0);
|
||||
template<class DeviceType> void exchange_device();
|
||||
template<class DeviceType> void borders_device();
|
||||
|
||||
@ -91,6 +91,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
exchange_comm_changed = 0;
|
||||
forward_comm_changed = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
reverse_pair_comm_changed = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
reverse_comm_changed = 0;
|
||||
|
||||
@ -239,7 +240,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
newtonflag = 0;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else {
|
||||
@ -253,7 +254,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
newtonflag = 1;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
}
|
||||
@ -394,17 +395,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
@ -441,6 +442,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/pair/reverse") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) reverse_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) reverse_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) reverse_pair_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
reverse_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
|
||||
@ -515,6 +524,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_pair_comm_classic = 1;
|
||||
forward_pair_comm_changed = 1;
|
||||
}
|
||||
if (reverse_pair_comm_classic == 0) {
|
||||
reverse_pair_comm_classic = 1;
|
||||
reverse_pair_comm_changed = 1;
|
||||
}
|
||||
if (forward_fix_comm_classic == 0) {
|
||||
forward_fix_comm_classic = 1;
|
||||
forward_fix_comm_changed = 1;
|
||||
@ -540,6 +553,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_pair_comm_classic = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
}
|
||||
if (reverse_pair_comm_changed) {
|
||||
reverse_pair_comm_classic = 0;
|
||||
reverse_pair_comm_changed = 0;
|
||||
}
|
||||
if (forward_fix_comm_changed) {
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
|
||||
@ -30,6 +30,7 @@ class KokkosLMP : protected Pointers {
|
||||
int exchange_comm_classic;
|
||||
int forward_comm_classic;
|
||||
int forward_pair_comm_classic;
|
||||
int reverse_pair_comm_classic;
|
||||
int forward_fix_comm_classic;
|
||||
int reverse_comm_classic;
|
||||
int exchange_comm_on_host;
|
||||
@ -38,6 +39,7 @@ class KokkosLMP : protected Pointers {
|
||||
int exchange_comm_changed;
|
||||
int forward_comm_changed;
|
||||
int forward_pair_comm_changed;
|
||||
int reverse_pair_comm_changed;
|
||||
int forward_fix_comm_changed;
|
||||
int reverse_comm_changed;
|
||||
int nthreads,ngpus;
|
||||
|
||||
@ -29,6 +29,10 @@ class KokkosBase {
|
||||
int, int *) {return 0;};
|
||||
virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
|
||||
|
||||
virtual int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {return 0;};
|
||||
virtual void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
|
||||
int, DAT::tdual_xfloat_1d &) {}
|
||||
|
||||
// Fix
|
||||
virtual int pack_forward_comm_fix_kokkos(int, DAT::tdual_int_2d,
|
||||
int, DAT::tdual_xfloat_1d &,
|
||||
|
||||
@ -477,59 +477,3 @@ double MathSpecialKokkos::erfcx_y100(const double y100)
|
||||
return 1.0;
|
||||
} /* erfcx_y100 */
|
||||
|
||||
/* optimizer friendly implementation of exp2(x).
|
||||
*
|
||||
* strategy:
|
||||
*
|
||||
* split argument into an integer part and a fraction:
|
||||
* ipart = floor(x+0.5);
|
||||
* fpart = x - ipart;
|
||||
*
|
||||
* compute exp2(ipart) from setting the ieee754 exponent
|
||||
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
|
||||
*
|
||||
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
|
||||
*/
|
||||
|
||||
/* IEEE 754 double precision floating point data manipulation */
|
||||
typedef union
|
||||
{
|
||||
double f;
|
||||
uint64_t u;
|
||||
struct {int32_t i0,i1;} s;
|
||||
} udi_t;
|
||||
|
||||
static const double fm_exp2_q[] = {
|
||||
/* 1.00000000000000000000e0, */
|
||||
2.33184211722314911771e2,
|
||||
4.36821166879210612817e3
|
||||
};
|
||||
static const double fm_exp2_p[] = {
|
||||
2.30933477057345225087e-2,
|
||||
2.02020656693165307700e1,
|
||||
1.51390680115615096133e3
|
||||
};
|
||||
|
||||
double MathSpecialKokkos::exp2_x86(double x)
|
||||
{
|
||||
double ipart, fpart, px, qx;
|
||||
udi_t epart;
|
||||
|
||||
ipart = floor(x+0.5);
|
||||
fpart = x - ipart;
|
||||
epart.s.i0 = 0;
|
||||
epart.s.i1 = (((int) ipart) + 1023) << 20;
|
||||
|
||||
x = fpart*fpart;
|
||||
|
||||
px = fm_exp2_p[0];
|
||||
px = px*x + fm_exp2_p[1];
|
||||
qx = x + fm_exp2_q[0];
|
||||
px = px*x + fm_exp2_p[2];
|
||||
qx = qx*x + fm_exp2_q[1];
|
||||
|
||||
px = px * fpart;
|
||||
|
||||
x = 1.0 + 2.0*(px/(qx-px));
|
||||
return epart.f*x;
|
||||
}
|
||||
|
||||
@ -22,79 +22,233 @@ namespace LAMMPS_NS {
|
||||
|
||||
namespace MathSpecialKokkos {
|
||||
|
||||
/*! Fast tabulated factorial function
|
||||
*
|
||||
* This function looks up pre-computed factorial values for arguments of n = 0
|
||||
* to a maximum of 167, which is the maximal value representable by a double
|
||||
* precision floating point number. For other values of n a NaN value is returned.
|
||||
*
|
||||
* \param n argument (valid: 0 <= n <= 167)
|
||||
* \return value of n! as double precision number or NaN */
|
||||
|
||||
extern double factorial(const int n);
|
||||
|
||||
/* optimizer friendly implementation of exp2(x).
|
||||
*
|
||||
* strategy:
|
||||
*
|
||||
* split argument into an integer part and a fraction:
|
||||
* ipart = floor(x+0.5);
|
||||
* fpart = x - ipart;
|
||||
*
|
||||
* compute exp2(ipart) from setting the ieee754 exponent
|
||||
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
|
||||
*
|
||||
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
|
||||
*/
|
||||
|
||||
/* IEEE 754 double precision floating point data manipulation */
|
||||
typedef union
|
||||
{
|
||||
double f;
|
||||
uint64_t u;
|
||||
struct {int32_t i0,i1;} s;
|
||||
} udi_t;
|
||||
|
||||
/* double precision constants */
|
||||
#define FM_DOUBLE_LOG2OFE 1.4426950408889634074
|
||||
|
||||
/*! Fast implementation of 2^x without argument checks for little endian CPUs
|
||||
*
|
||||
* This function implements an optimized version of pow(2.0, x) that does not
|
||||
* check for valid arguments and thus may only be used where arguments are well
|
||||
* behaved. The implementation makes assumptions about the layout of double
|
||||
* precision floating point numbers in memory and thus will only work on little
|
||||
* endian CPUs. If little endian cannot be safely detected, the result of
|
||||
* calling pow(2.0, x) will be returned. This function also is the basis for
|
||||
* the fast exponential fm_exp(x).
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of 2^x as double precision number */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double exp2_x86(double x)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
double ipart, fpart, px, qx;
|
||||
udi_t epart;
|
||||
|
||||
const double fm_exp2_q[2] = {
|
||||
/* 1.00000000000000000000e0, */
|
||||
2.33184211722314911771e2,
|
||||
4.36821166879210612817e3
|
||||
};
|
||||
const double fm_exp2_p[3] = {
|
||||
2.30933477057345225087e-2,
|
||||
2.02020656693165307700e1,
|
||||
1.51390680115615096133e3
|
||||
};
|
||||
|
||||
ipart = floor(x+0.5);
|
||||
fpart = x - ipart;
|
||||
epart.s.i0 = 0;
|
||||
epart.s.i1 = (((int) ipart) + 1023) << 20;
|
||||
|
||||
x = fpart*fpart;
|
||||
|
||||
px = fm_exp2_p[0];
|
||||
px = px*x + fm_exp2_p[1];
|
||||
qx = x + fm_exp2_q[0];
|
||||
px = px*x + fm_exp2_p[2];
|
||||
qx = qx*x + fm_exp2_q[1];
|
||||
|
||||
px = px * fpart;
|
||||
|
||||
x = 1.0 + 2.0*(px/(qx-px));
|
||||
return epart.f*x;
|
||||
#else
|
||||
return pow(2.0, x);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Fast implementation of exp(x) for little endian CPUs
|
||||
*
|
||||
* This function implements an optimized version of exp(x) for little endian CPUs.
|
||||
* It calls the exp2_x86(x) function with a suitable prefactor to x to return exp(x).
|
||||
* The implementation makes assumptions about the layout of double
|
||||
* precision floating point numbers in memory and thus will only work on little
|
||||
* endian CPUs. If little endian cannot be safely detected, the result of
|
||||
* calling the exp(x) implementation in the standard math library will be returned.
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^x as double precision number */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double fm_exp(double x)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
if (x < -1022.0/FM_DOUBLE_LOG2OFE) return 0;
|
||||
if (x > 1023.0/FM_DOUBLE_LOG2OFE) return INFINITY;
|
||||
return exp2_x86(FM_DOUBLE_LOG2OFE * x);
|
||||
#else
|
||||
return ::exp(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
// support function for scaled error function complement
|
||||
|
||||
extern double erfcx_y100(const double y100);
|
||||
|
||||
// fast 2**x function without argument checks for little endian CPUs
|
||||
extern double exp2_x86(double x);
|
||||
|
||||
// scaled error function complement exp(x*x)*erfc(x) for coul/long styles
|
||||
/*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
|
||||
*
|
||||
* This is a portable fast implementation of exp(x*x)*erfc(x) that can be used
|
||||
* in coul/long pair styles as a replacement for the polynomial expansion that
|
||||
* is/was widely used. Unlike the polynomial expansion, that is only accurate
|
||||
* at the level of single precision floating point it provides full double precision
|
||||
* accuracy, but at comparable speed (unlike the erfc() implementation shipped
|
||||
* with GNU standard math library).
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^(x*x)*erfc(x) */
|
||||
|
||||
static inline double my_erfcx(const double x)
|
||||
{
|
||||
if (x >= 0.0) return erfcx_y100(400.0/(4.0+x));
|
||||
else return 2.0*exp(x*x) - erfcx_y100(400.0/(4.0-x));
|
||||
if (x >= 0.0)
|
||||
return erfcx_y100(400.0 / (4.0 + x));
|
||||
else
|
||||
return 2.0 * exp(x * x) - erfcx_y100(400.0 / (4.0 - x));
|
||||
}
|
||||
|
||||
// exp(-x*x) for coul/long styles
|
||||
/*! Fast implementation of exp(-x*x) for little endian CPUs for coul/long styles
|
||||
*
|
||||
* This function implements an optimized version of exp(-x*x) based on exp2_x86()
|
||||
* for use with little endian CPUs. If little endian cannot be safely detected,
|
||||
* the result of calling the exp(-x*x) implementation in the standard math
|
||||
* library will be returned.
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^(-x*x) as double precision number */
|
||||
|
||||
static inline double expmsq(double x)
|
||||
{
|
||||
x *= x;
|
||||
x *= 1.4426950408889634074; // log_2(e)
|
||||
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return (x < 1023.0) ? exp2_x86(-x) : 0.0;
|
||||
#else
|
||||
return (x < 1023.0) ? exp2(-x) : 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// x**2, use instead of pow(x,2.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double square(const double &x) { return x*x; }
|
||||
/*! Fast inline version of pow(x, 2.0)
|
||||
*
|
||||
* \param x argument
|
||||
* \return x*x */
|
||||
|
||||
// x**3, use instead of pow(x,3.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double cube(const double &x) { return x*x*x; }
|
||||
static double square(const double &x) { return x * x; }
|
||||
|
||||
/*! Fast inline version of pow(x, 3.0)
|
||||
*
|
||||
* \param x argument
|
||||
* \return x*x */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double cube(const double &x) { return x * x * x; }
|
||||
|
||||
/* Fast inline version of pow(-1.0, n)
|
||||
*
|
||||
* \param n argument (integer)
|
||||
* \return -1 if n is odd, 1.0 if n is even */
|
||||
|
||||
// return -1.0 for odd n, 1.0 for even n, like pow(-1.0,n)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsign(const int n) { return (n & 1) ? -1.0 : 1.0; }
|
||||
|
||||
// optimized version of pow(x,n) with n being integer
|
||||
// up to 10x faster than pow(x,y)
|
||||
/* Fast inline version of pow(x,n) for integer n
|
||||
*
|
||||
* This is a version of pow(x,n) optimized for n being integer.
|
||||
* Speedups of up to 10x faster than pow(x,y) have been measured.
|
||||
*
|
||||
* \param n argument (integer)
|
||||
* \return value of x^n */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powint(const double &x, const int n) {
|
||||
double yy,ww;
|
||||
static double powint(const double &x, const int n)
|
||||
{
|
||||
double yy, ww;
|
||||
|
||||
if (x == 0.0) return 0.0;
|
||||
int nn = (n > 0) ? n : -n;
|
||||
ww = x;
|
||||
|
||||
for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww)
|
||||
for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww)
|
||||
if (nn & 1) yy *= ww;
|
||||
|
||||
return (n > 0) ? yy : 1.0/yy;
|
||||
return (n > 0) ? yy : 1.0 / yy;
|
||||
}
|
||||
|
||||
// optimized version of (sin(x)/x)**n with n being a _positive_ integer
|
||||
/* Fast inline version of (sin(x)/x)^n as used by PPPM kspace styles
|
||||
*
|
||||
* This is an optimized function to compute (sin(x)/x)^n as frequently used by PPPM.
|
||||
*
|
||||
* \param n argument (integer). Expected to be positive.
|
||||
* \return value of (sin(x)/x)^n */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsinxx(const double &x, int n) {
|
||||
double yy,ww;
|
||||
static double powsinxx(const double &x, int n)
|
||||
{
|
||||
double yy, ww;
|
||||
|
||||
if (x == 0.0) return 1.0;
|
||||
|
||||
ww = sin(x)/x;
|
||||
ww = sin(x) / x;
|
||||
|
||||
for (yy = 1.0; n != 0; n >>= 1, ww *=ww)
|
||||
for (yy = 1.0; n != 0; n >>= 1, ww *= ww)
|
||||
if (n & 1) yy *= ww;
|
||||
|
||||
return yy;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace MathSpecialKokkos
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
|
||||
164
src/KOKKOS/meam_dens_final_kokkos.h
Normal file
164
src/KOKKOS/meam_dens_final_kokkos.h
Normal file
@ -0,0 +1,164 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
#include "math_special.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom,
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_int_2d d_scale, int& errorflag, EV_FLOAT &ev_all)
|
||||
{
|
||||
EV_FLOAT ev;
|
||||
this->eflag_either = eflag_either;
|
||||
this->eflag_global = eflag_global;
|
||||
this->eflag_atom = eflag_atom;
|
||||
this->d_eatom = eatom;
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->d_scale = d_scale;
|
||||
|
||||
Kokkos::deep_copy(d_errorflag,0);
|
||||
|
||||
// Complete the calculation of density
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensFinal>(0,nlocal),*this,ev);
|
||||
ev_all.evdwl += ev.evdwl;
|
||||
copymode = 0;
|
||||
|
||||
auto h_errorflag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_errorflag);
|
||||
errorflag = h_errorflag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensFinal, const int &i, EV_FLOAT& ev) const {
|
||||
|
||||
F_FLOAT rhob, G, dG, Gbar, dGbar, gam, shp[3], Z;
|
||||
F_FLOAT denom, rho_bkgd, Fl;
|
||||
double scaleii;
|
||||
|
||||
int elti = d_map[type[i]];
|
||||
if (elti >= 0) {
|
||||
scaleii = d_scale(type[i],type[i]);
|
||||
d_rho1[i] = 0.0;
|
||||
d_rho2[i] = -1.0 / 3.0 * d_arho2b[i] * d_arho2b[i];
|
||||
d_rho3[i] = 0.0;
|
||||
for (int m = 0; m < 3; m++) {
|
||||
d_rho1[i] += d_arho1(i,m) * d_arho1(i,m);
|
||||
d_rho3[i] -= 3.0 / 5.0 * d_arho3b(i,m) * d_arho3b(i,m);
|
||||
}
|
||||
for (int m = 0; m < 6; m++)
|
||||
d_rho2[i] += v2D[m] * d_arho2(i,m) * d_arho2(i,m);
|
||||
for (int m = 0; m < 10; m++)
|
||||
d_rho3[i] += v3D[m] * d_arho3(i,m) * d_arho3(i,m);
|
||||
|
||||
if (d_rho0[i] > 0.0) {
|
||||
if (ialloy == 1) {
|
||||
d_t_ave(i,0) = fdiv_zero_kk(d_t_ave(i,0), d_tsq_ave(i,0));
|
||||
d_t_ave(i,1) = fdiv_zero_kk(d_t_ave(i,1), d_tsq_ave(i,1));
|
||||
d_t_ave(i,2) = fdiv_zero_kk(d_t_ave(i,2), d_tsq_ave(i,2));
|
||||
} else if (ialloy == 2) {
|
||||
d_t_ave(i,0) = t1_meam[elti];
|
||||
d_t_ave(i,1) = t2_meam[elti];
|
||||
d_t_ave(i,2) = t3_meam[elti];
|
||||
} else {
|
||||
d_t_ave(i,0) /= d_rho0[i];
|
||||
d_t_ave(i,1) /= d_rho0[i];
|
||||
d_t_ave(i,2) /= d_rho0[i];
|
||||
}
|
||||
}
|
||||
|
||||
d_gamma[i] = d_t_ave(i,0) * d_rho1[i] + d_t_ave(i,1) * d_rho2[i] + d_t_ave(i,2) * d_rho3[i];
|
||||
|
||||
if (d_rho0[i] > 0.0)
|
||||
d_gamma[i] /= (d_rho0[i] * d_rho0[i]);
|
||||
|
||||
Z = get_Zij(lattce_meam[elti][elti]);
|
||||
|
||||
G = G_gam(d_gamma[i], ibar_meam[elti], d_errorflag());
|
||||
if (d_errorflag() != 0)
|
||||
return;
|
||||
|
||||
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shp);
|
||||
if (ibar_meam[elti] <= 0) {
|
||||
Gbar = 1.0;
|
||||
dGbar = 0.0;
|
||||
} else {
|
||||
if (mix_ref_t == 1)
|
||||
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
|
||||
else
|
||||
gam = (t1_meam[elti] * shp[0] + t2_meam[elti] * shp[1] + t3_meam[elti] * shp[2]) /
|
||||
(Z * Z);
|
||||
Gbar = G_gam(gam, ibar_meam[elti], d_errorflag());
|
||||
}
|
||||
d_rho[i] = d_rho0[i] * G;
|
||||
|
||||
if (mix_ref_t == 1) {
|
||||
if (ibar_meam[elti] <= 0) {
|
||||
Gbar = 1.0;
|
||||
dGbar = 0.0;
|
||||
} else {
|
||||
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
|
||||
Gbar = dG_gam(gam, ibar_meam[elti], dGbar);
|
||||
}
|
||||
rho_bkgd = rho0_meam[elti] * Z * Gbar;
|
||||
} else {
|
||||
if (bkgd_dyn == 1)
|
||||
rho_bkgd = rho0_meam[elti] * Z;
|
||||
else
|
||||
rho_bkgd = rho_ref_meam[elti];
|
||||
}
|
||||
rhob = d_rho[i] / rho_bkgd;
|
||||
denom = 1.0 / rho_bkgd;
|
||||
|
||||
G = dG_gam(d_gamma[i], ibar_meam[elti], dG);
|
||||
|
||||
d_dgamma1[i] = (G - 2 * dG * d_gamma[i]) * denom;
|
||||
|
||||
if (!iszero_kk(d_rho0[i]))
|
||||
d_dgamma2[i] = (dG / d_rho0[i]) * denom;
|
||||
else
|
||||
d_dgamma2[i] = 0.0;
|
||||
|
||||
// dgamma3 is nonzero only if we are using the "mixed" rule for
|
||||
// computing t in the reference system (which is not correct, but
|
||||
// included for backward compatibility
|
||||
if (mix_ref_t == 1)
|
||||
d_dgamma3[i] = d_rho0[i] * G * dGbar / (Gbar * Z * Z) * denom;
|
||||
else
|
||||
d_dgamma3[i] = 0.0;
|
||||
|
||||
Fl = embedding(A_meam[elti], Ec_meam[elti][elti], rhob, d_frhop[i]);
|
||||
|
||||
if (eflag_either) {
|
||||
Fl *= scaleii;
|
||||
if (eflag_global) {
|
||||
ev.evdwl += Fl;
|
||||
}
|
||||
if (eflag_atom) {
|
||||
d_eatom[i] += Fl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
602
src/KOKKOS/meam_dens_init_kokkos.h
Normal file
602
src/KOKKOS/meam_dens_init_kokkos.h
Normal file
@ -0,0 +1,602 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
#include "math_special_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensInit<NEIGHFLAG>, const int &i) const {
|
||||
int ii, offsetval;
|
||||
ii = d_ilist_half[i];
|
||||
offsetval = d_offset[i];
|
||||
// compute screening function and derivatives
|
||||
this->template getscreen<NEIGHFLAG>(ii, offsetval, x, d_numneigh_half,
|
||||
d_numneigh_full, ntype, type, d_map);
|
||||
|
||||
// calculate intermediate density terms to be communicated
|
||||
this->template calc_rho1<NEIGHFLAG>(ii, ntype, type, d_map, x, d_numneigh_half, offsetval);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMZero, const int &i) const {
|
||||
d_rho0[i] = 0.0;
|
||||
d_arho2b[i] = 0.0;
|
||||
d_arho1(i,0) = d_arho1(i,1) = d_arho1(i,2) = 0.0;
|
||||
for (int j = 0; j < 6; j++)
|
||||
d_arho2(i,j) = 0.0;
|
||||
for (int j = 0; j < 10; j++)
|
||||
d_arho3(i,j) = 0.0;
|
||||
d_arho3b(i,0) = d_arho3b(i,1) = d_arho3b(i,2) = 0.0;
|
||||
d_t_ave(i,0) = d_t_ave(i,1) = d_t_ave(i,2) = 0.0;
|
||||
d_tsq_ave(i,0) = d_tsq_ave(i,1) = d_tsq_ave(i,2) = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_setup(int atom_nmax, int nall, int n_neigh)
|
||||
{
|
||||
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
|
||||
|
||||
// grow local arrays if necessary
|
||||
|
||||
if (atom_nmax > nmax) {
|
||||
memoryKK->destroy_kokkos(k_rho,rho);
|
||||
memoryKK->destroy_kokkos(k_rho0,rho0);
|
||||
memoryKK->destroy_kokkos(k_rho1,rho1);
|
||||
memoryKK->destroy_kokkos(k_rho2,rho2);
|
||||
memoryKK->destroy_kokkos(k_rho3,rho3);
|
||||
memoryKK->destroy_kokkos(k_frhop,frhop);
|
||||
memoryKK->destroy_kokkos(k_gamma,gamma);
|
||||
memoryKK->destroy_kokkos(k_dgamma1,dgamma1);
|
||||
memoryKK->destroy_kokkos(k_dgamma2,dgamma2);
|
||||
memoryKK->destroy_kokkos(k_dgamma3,dgamma3);
|
||||
memoryKK->destroy_kokkos(k_arho2b,arho2b);
|
||||
memoryKK->destroy_kokkos(k_arho1,arho1);
|
||||
memoryKK->destroy_kokkos(k_arho2,arho2);
|
||||
memoryKK->destroy_kokkos(k_arho3,arho3);
|
||||
memoryKK->destroy_kokkos(k_arho3b,arho3b);
|
||||
memoryKK->destroy_kokkos(k_t_ave,t_ave);
|
||||
memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave);
|
||||
|
||||
nmax = atom_nmax;
|
||||
// memory->create(rho, nmax, "pair:rho");
|
||||
k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
|
||||
d_rho = k_rho.template view<DeviceType>();
|
||||
h_rho = k_rho.h_view;
|
||||
// memory->create(rho0, nmax, "pair:rho0");
|
||||
k_rho0 = DAT::tdual_ffloat_1d("pair:rho0",nmax);
|
||||
d_rho0 = k_rho0.template view<DeviceType>();
|
||||
h_rho0 = k_rho0.h_view;
|
||||
//memory->create(rho1, nmax, "pair:rho1");
|
||||
k_rho1 = DAT::tdual_ffloat_1d("pair:rho1",nmax);
|
||||
d_rho1 = k_rho1.template view<DeviceType>();
|
||||
h_rho1 = k_rho1.h_view;
|
||||
//memory->create(rho2, nmax, "pair:rho2");
|
||||
k_rho2 = DAT::tdual_ffloat_1d("pair:rho2",nmax);
|
||||
d_rho2 = k_rho2.template view<DeviceType>();
|
||||
h_rho2 = k_rho2.h_view;
|
||||
//memory->create(rho3, nmax, "pair:rho3");
|
||||
k_rho3 = DAT::tdual_ffloat_1d("pair:rho3",nmax);
|
||||
d_rho3 = k_rho3.template view<DeviceType>();
|
||||
h_rho3 = k_rho3.h_view;
|
||||
//memory->create(frhop, nmax, "pair:frhop");
|
||||
k_frhop = DAT::tdual_ffloat_1d("pair:frhop",nmax);
|
||||
d_frhop = k_frhop.template view<DeviceType>();
|
||||
h_frhop = k_frhop.h_view;
|
||||
//memory->create(gamma, nmax, "pair:gamma");
|
||||
k_gamma = DAT::tdual_ffloat_1d("pair:gamma",nmax);
|
||||
d_gamma = k_gamma.template view<DeviceType>();
|
||||
h_gamma = k_gamma.h_view;
|
||||
//memory->create(dgamma1, nmax, "pair:dgamma1");
|
||||
k_dgamma1 = DAT::tdual_ffloat_1d("pair:dgamma1",nmax);
|
||||
d_dgamma1 = k_dgamma1.template view<DeviceType>();
|
||||
h_dgamma1 = k_dgamma1.h_view;
|
||||
//memory->create(dgamma2, nmax, "pair:dgamma2");
|
||||
k_dgamma2 = DAT::tdual_ffloat_1d("pair:dgamma2",nmax);
|
||||
d_dgamma2 = k_dgamma2.template view<DeviceType>();
|
||||
h_dgamma2 = k_dgamma2.h_view;
|
||||
//memory->create(dgamma3, nmax, "pair:dgamma3");
|
||||
k_dgamma3 = DAT::tdual_ffloat_1d("pair:dgamma3",nmax);
|
||||
d_dgamma3 = k_dgamma3.template view<DeviceType>();
|
||||
h_dgamma3 = k_dgamma3.h_view;
|
||||
//memory->create(arho2b, nmax, "pair:arho2b");
|
||||
k_arho2b = DAT::tdual_ffloat_1d("pair:arho2b",nmax);
|
||||
d_arho2b = k_arho2b.template view<DeviceType>();
|
||||
h_arho2b = k_arho2b.h_view;
|
||||
//memory->create(arho1, nmax, 3, "pair:arho1");
|
||||
k_arho1 = DAT::tdual_ffloat_2d("pair:arho1",nmax, 3);
|
||||
d_arho1 = k_arho1.template view<DeviceType>();
|
||||
h_arho1 = k_arho1.h_view;
|
||||
//memory->create(arho2, nmax, 6, "pair:arho2");
|
||||
k_arho2 = DAT::tdual_ffloat_2d("pair:arho2",nmax, 6);
|
||||
d_arho2 = k_arho2.template view<DeviceType>();
|
||||
h_arho2 = k_arho2.h_view;
|
||||
//memory->create(arho3, nmax, 10, "pair:arho3");
|
||||
k_arho3 = DAT::tdual_ffloat_2d("pair:arho3",nmax, 10);
|
||||
d_arho3 = k_arho3.template view<DeviceType>();
|
||||
h_arho3 = k_arho3.h_view;
|
||||
//memory->create(arho3b, nmax, 3, "pair:arho3b");
|
||||
k_arho3b = DAT::tdual_ffloat_2d("pair:arho3b",nmax, 3);
|
||||
d_arho3b = k_arho3b.template view<DeviceType>();
|
||||
h_arho3b = k_arho3b.h_view;
|
||||
//memory->create(t_ave, nmax, 3, "pair:t_ave");
|
||||
k_t_ave = DAT::tdual_ffloat_2d("pair:t_ave",nmax, 3);
|
||||
d_t_ave = k_t_ave.template view<DeviceType>();
|
||||
h_t_ave = k_t_ave.h_view;
|
||||
//memory->create(tsq_ave, nmax, 3, "pair:tsq_ave");
|
||||
k_tsq_ave = DAT::tdual_ffloat_2d("pair:tsq_ave",nmax, 3);
|
||||
d_tsq_ave = k_tsq_ave.template view<DeviceType>();
|
||||
h_tsq_ave = k_tsq_ave.h_view;
|
||||
}
|
||||
|
||||
if (n_neigh > maxneigh) {
|
||||
memoryKK->destroy_kokkos(k_scrfcn,scrfcn);
|
||||
memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn);
|
||||
memoryKK->destroy_kokkos(k_fcpair,fcpair);
|
||||
maxneigh = n_neigh;
|
||||
// memory->create(scrfcn, maxneigh, "pair:scrfcn");
|
||||
k_scrfcn = DAT::tdual_ffloat_1d("pair:scrfcn", maxneigh);
|
||||
d_scrfcn = k_scrfcn.template view<DeviceType>();
|
||||
h_scrfcn = k_scrfcn.h_view;
|
||||
//memory->create(dscrfcn, maxneigh, "pair:dscrfcn");
|
||||
k_dscrfcn = DAT::tdual_ffloat_1d("pair:dscrfcn", maxneigh);
|
||||
d_dscrfcn = k_dscrfcn.template view<DeviceType>();
|
||||
h_dscrfcn = k_dscrfcn.h_view;
|
||||
//memory->create(fcpair, maxneigh, "pair:fcpair");
|
||||
k_fcpair = DAT::tdual_ffloat_1d("pair:fcpair", maxneigh);
|
||||
d_fcpair = k_fcpair.template view<DeviceType>();
|
||||
h_fcpair = k_fcpair.h_view;
|
||||
}
|
||||
|
||||
// zero out local arrays
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMZero>(0, nall),*this);
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half, typename AT::t_int_1d d_numneigh_full,
|
||||
typename AT::t_int_1d d_ilist_half, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, typename AT::t_int_1d d_offset, int neighflag, int need_dup)
|
||||
{
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->x = x;
|
||||
this->d_numneigh_half = d_numneigh_half;
|
||||
this->d_numneigh_full = d_numneigh_full;
|
||||
this->d_ilist_half = d_ilist_half;
|
||||
this->d_neighbors_half = d_neighbors_half;
|
||||
this->d_neighbors_full = d_neighbors_full;
|
||||
this->d_offset = d_offset;
|
||||
this->nlocal = nlocal;
|
||||
|
||||
if (need_dup) {
|
||||
dup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho0);
|
||||
dup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2b);
|
||||
dup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho1);
|
||||
dup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2);
|
||||
dup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3);
|
||||
dup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3b);
|
||||
dup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_t_ave);
|
||||
dup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_tsq_ave);
|
||||
} else {
|
||||
ndup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho0);
|
||||
ndup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2b);
|
||||
ndup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho1);
|
||||
ndup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2);
|
||||
ndup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3);
|
||||
ndup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3b);
|
||||
ndup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_t_ave);
|
||||
ndup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_tsq_ave);
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
if (neighflag == HALF)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALF> >(0,inum_half),*this);
|
||||
else if (neighflag == HALFTHREAD)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALFTHREAD> >(0,inum_half),*this);
|
||||
copymode = 0;
|
||||
|
||||
if (need_dup) {
|
||||
Kokkos::Experimental::contribute(d_rho0, dup_rho0);
|
||||
Kokkos::Experimental::contribute(d_arho2b, dup_arho2b);
|
||||
Kokkos::Experimental::contribute(d_arho1, dup_arho1);
|
||||
Kokkos::Experimental::contribute(d_arho2, dup_arho2);
|
||||
Kokkos::Experimental::contribute(d_arho3, dup_arho3);
|
||||
Kokkos::Experimental::contribute(d_arho3b, dup_arho3b);
|
||||
Kokkos::Experimental::contribute(d_t_ave, dup_t_ave);
|
||||
Kokkos::Experimental::contribute(d_tsq_ave, dup_tsq_ave);
|
||||
|
||||
// free duplicated memory
|
||||
dup_rho0 = decltype(dup_rho0)();
|
||||
dup_arho2b = decltype(dup_arho2b)();
|
||||
dup_arho1 = decltype(dup_arho1)();
|
||||
dup_arho2 = decltype(dup_arho2)();
|
||||
dup_arho3 = decltype(dup_arho3)();
|
||||
dup_arho3b = decltype(dup_arho3b)();
|
||||
dup_t_ave = decltype(dup_t_ave)();
|
||||
dup_tsq_ave = decltype(dup_tsq_ave)();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
MEAMKokkos<DeviceType>::getscreen(int i, int offset, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half,
|
||||
typename AT::t_int_1d d_numneigh_full, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map)
|
||||
const {
|
||||
const double drinv = 1.0 / delr_meam;
|
||||
const int elti = d_map[type[i]];
|
||||
if (elti < 0) return;
|
||||
|
||||
const double xitmp = x(i,0);
|
||||
const double yitmp = x(i,1);
|
||||
const double zitmp = x(i,2);
|
||||
|
||||
for (int jn = 0; jn < d_numneigh_half[i]; jn++) {
|
||||
const int j = d_neighbors_half(i,jn);
|
||||
|
||||
const int eltj = d_map[type[j]];
|
||||
if (eltj < 0) continue;
|
||||
|
||||
// First compute screening function itself, sij
|
||||
const double xjtmp = x(j,0);
|
||||
const double yjtmp = x(j,1);
|
||||
const double zjtmp = x(j,2);
|
||||
const double delxij = xjtmp - xitmp;
|
||||
const double delyij = yjtmp - yitmp;
|
||||
const double delzij = zjtmp - zitmp;
|
||||
|
||||
const double rij2 = delxij * delxij + delyij * delyij + delzij * delzij;
|
||||
|
||||
if (rij2 > cutforcesq) {
|
||||
d_dscrfcn[offset+jn] = 0.0;
|
||||
d_scrfcn[offset+jn] = 0.0;
|
||||
d_fcpair[offset+jn] = 0.0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now compute derivatives
|
||||
const double rbound = ebound_meam[elti][eltj] * rij2;
|
||||
const double rij = sqrt(rij2);
|
||||
const double rnorm = (cutforce - rij) * drinv;
|
||||
double sij = 1.0;
|
||||
|
||||
// if rjk2 > ebound*rijsq, atom k is definitely outside the ellipse
|
||||
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
int k = d_neighbors_full(i,kn);
|
||||
if (k == j) continue;
|
||||
int eltk = d_map[type[k]];
|
||||
if (eltk < 0) continue;
|
||||
|
||||
const double xktmp = x(k,0);
|
||||
const double yktmp = x(k,1);
|
||||
const double zktmp = x(k,2);
|
||||
|
||||
const double delxjk = xktmp - xjtmp;
|
||||
const double delyjk = yktmp - yjtmp;
|
||||
const double delzjk = zktmp - zjtmp;
|
||||
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
|
||||
if (rjk2 > rbound) continue;
|
||||
|
||||
const double delxik = xktmp - xitmp;
|
||||
const double delyik = yktmp - yitmp;
|
||||
const double delzik = zktmp - zitmp;
|
||||
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
|
||||
if (rik2 > rbound) continue;
|
||||
|
||||
const double xik = rik2 / rij2;
|
||||
const double xjk = rjk2 / rij2;
|
||||
const double a = 1 - (xik - xjk) * (xik - xjk);
|
||||
// if a < 0, then ellipse equation doesn't describe this case and
|
||||
// atom k can't possibly screen i-j
|
||||
if (a <= 0.0) continue;
|
||||
|
||||
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
double sikj;
|
||||
if (cikj >= Cmax) continue;
|
||||
// note that cikj may be slightly negative (within numerical
|
||||
// tolerance) if atoms are colinear, so don't reject that case here
|
||||
// (other negative cikj cases were handled by the test on "a" above)
|
||||
else if (cikj <= Cmin) {
|
||||
sij = 0.0;
|
||||
break;
|
||||
} else {
|
||||
const double delc = Cmax - Cmin;
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
sikj = fcut(cikj);
|
||||
}
|
||||
sij *= sikj;
|
||||
}
|
||||
|
||||
double dfc;
|
||||
const double fc = dfcut(rnorm, dfc);
|
||||
const double fcij = fc;
|
||||
const double dfcij = dfc * drinv;
|
||||
|
||||
// Now compute derivatives
|
||||
d_dscrfcn[offset+jn] = 0.0;
|
||||
const double sfcij = sij * fcij;
|
||||
if (!iszero_kk(sfcij) && !isone_kk(sfcij)) {
|
||||
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
const int k = d_neighbors_full(i,kn);
|
||||
if (k == j) continue;
|
||||
const int eltk = d_map[type[k]];
|
||||
if (eltk < 0) continue;
|
||||
|
||||
const double delxjk = x(k,0) - xjtmp;
|
||||
const double delyjk = x(k,1) - yjtmp;
|
||||
const double delzjk = x(k,2) - zjtmp;
|
||||
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
|
||||
if (rjk2 > rbound) continue;
|
||||
|
||||
const double delxik = x(k,0) - xitmp;
|
||||
const double delyik = x(k,1) - yitmp;
|
||||
const double delzik = x(k,2) - zitmp;
|
||||
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
|
||||
if (rik2 > rbound) continue;
|
||||
|
||||
const double xik = rik2 / rij2;
|
||||
const double xjk = rjk2 / rij2;
|
||||
const double a = 1 - (xik - xjk) * (xik - xjk);
|
||||
// if a < 0, then ellipse equation doesn't describe this case and
|
||||
// atom k can't possibly screen i-j
|
||||
if (a <= 0.0) continue;
|
||||
|
||||
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
if (cikj >= Cmax) {
|
||||
continue;
|
||||
// Note that cikj may be slightly negative (within numerical
|
||||
// tolerance) if atoms are colinear, so don't reject that case
|
||||
// here
|
||||
// (other negative cikj cases were handled by the test on "a"
|
||||
// above)
|
||||
// Note that we never have 0<cikj<Cmin here, else sij=0
|
||||
// (rejected above)
|
||||
} else {
|
||||
const double delc = Cmax - Cmin;
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
double dfikj;
|
||||
const double sikj = dfcut(cikj, dfikj);
|
||||
const double coef1 = dfikj / (delc * sikj);
|
||||
const double dCikj = dCfunc(rij2, rik2, rjk2);
|
||||
d_dscrfcn[offset+jn] += coef1 * dCikj;
|
||||
}
|
||||
}
|
||||
const double coef1 = sfcij;
|
||||
const double coef2 = sij * dfcij / rij;
|
||||
d_dscrfcn[offset+jn] = d_dscrfcn[offset+jn] * coef1 - coef2;
|
||||
}
|
||||
|
||||
d_scrfcn[offset+jn] = sij;
|
||||
d_fcpair[offset+jn] = fcij;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
MEAMKokkos<DeviceType>::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh,
|
||||
int offset) const
|
||||
{
|
||||
// The rho0, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_rho0 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_rho0),decltype(ndup_rho0)>::get(dup_rho0,ndup_rho0);
|
||||
auto a_rho0 = v_rho0.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho2b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2b),decltype(ndup_arho2b)>::get(dup_arho2b,ndup_arho2b);
|
||||
auto a_arho2b = v_arho2b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho1 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho1),decltype(ndup_arho1)>::get(dup_arho1,ndup_arho1);
|
||||
auto a_arho1 = v_arho1.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho2 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2),decltype(ndup_arho2)>::get(dup_arho2,ndup_arho2);
|
||||
auto a_arho2 = v_arho2.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho3 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3),decltype(ndup_arho3)>::get(dup_arho3,ndup_arho3);
|
||||
auto a_arho3 = v_arho3.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho3b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3b),decltype(ndup_arho3b)>::get(dup_arho3b,ndup_arho3b);
|
||||
auto a_arho3b = v_arho3b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_t_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_t_ave),decltype(ndup_t_ave)>::get(dup_t_ave,ndup_t_ave);
|
||||
auto a_t_ave = v_t_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_tsq_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_tsq_ave),decltype(ndup_tsq_ave)>::get(dup_tsq_ave,ndup_tsq_ave);
|
||||
auto a_tsq_ave = v_tsq_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
|
||||
const int elti = d_map[type[i]];
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
for (int jn = 0; jn < d_numneigh[i]; jn++) {
|
||||
if (!iszero_kk(d_scrfcn[offset+jn])) {
|
||||
const int j = d_neighbors_half(i,jn);
|
||||
const double sij = d_scrfcn[offset+jn] * d_fcpair[offset+jn];
|
||||
double delij[3];
|
||||
delij[0] = x(j,0) - xtmp;
|
||||
delij[1] = x(j,1) - ytmp;
|
||||
delij[2] = x(j,2) - ztmp;
|
||||
const double rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
|
||||
if (rij2 < cutforcesq) {
|
||||
const int eltj = d_map[type[j]];
|
||||
const double rij = sqrt(rij2);
|
||||
const double ai = rij / re_meam[elti][elti] - 1.0;
|
||||
const double aj = rij / re_meam[eltj][eltj] - 1.0;
|
||||
const double ro0i = rho0_meam[elti];
|
||||
const double ro0j = rho0_meam[eltj];
|
||||
const double rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj) * sij;
|
||||
double rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj) * sij;
|
||||
double rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj) * sij;
|
||||
double rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj) * sij;
|
||||
const double rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai) * sij;
|
||||
double rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai) * sij;
|
||||
double rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai) * sij;
|
||||
double rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai) * sij;
|
||||
if (ialloy == 1) {
|
||||
rhoa1j *= t1_meam[eltj];
|
||||
rhoa2j *= t2_meam[eltj];
|
||||
rhoa3j *= t3_meam[eltj];
|
||||
rhoa1i *= t1_meam[elti];
|
||||
rhoa2i *= t2_meam[elti];
|
||||
rhoa3i *= t3_meam[elti];
|
||||
}
|
||||
a_rho0[i] += rhoa0j;
|
||||
a_rho0[j] += rhoa0i;
|
||||
// For ialloy = 2, use single-element value (not average)
|
||||
if (ialloy != 2) {
|
||||
a_t_ave(i,0) += t1_meam[eltj] * rhoa0j;
|
||||
a_t_ave(i,1) += t2_meam[eltj] * rhoa0j;
|
||||
a_t_ave(i,2) += t3_meam[eltj] * rhoa0j;
|
||||
a_t_ave(j,0) += t1_meam[elti] * rhoa0i;
|
||||
a_t_ave(j,1) += t2_meam[elti] * rhoa0i;
|
||||
a_t_ave(j,2) += t3_meam[elti] * rhoa0i;
|
||||
}
|
||||
if (ialloy == 1) {
|
||||
a_tsq_ave(i,0) += t1_meam[eltj] * t1_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(i,1) += t2_meam[eltj] * t2_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(i,2) += t3_meam[eltj] * t3_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(j,0) += t1_meam[elti] * t1_meam[elti] * rhoa0i;
|
||||
a_tsq_ave(j,1) += t2_meam[elti] * t2_meam[elti] * rhoa0i;
|
||||
a_tsq_ave(j,2) += t3_meam[elti] * t3_meam[elti] * rhoa0i;
|
||||
}
|
||||
a_arho2b[i] += rhoa2j;
|
||||
a_arho2b[j] += rhoa2i;
|
||||
|
||||
const double A1j = rhoa1j / rij;
|
||||
const double A2j = rhoa2j / rij2;
|
||||
const double A3j = rhoa3j / (rij2 * rij);
|
||||
const double A1i = rhoa1i / rij;
|
||||
const double A2i = rhoa2i / rij2;
|
||||
const double A3i = rhoa3i / (rij2 * rij);
|
||||
int nv2 = 0;
|
||||
int nv3 = 0;
|
||||
for (int m = 0; m < 3; m++) {
|
||||
a_arho1(i,m) += A1j * delij[m];
|
||||
a_arho1(j,m) += -A1i * delij[m];
|
||||
a_arho3b(i,m) += rhoa3j * delij[m] / rij;
|
||||
a_arho3b(j,m) += -rhoa3i * delij[m] / rij;
|
||||
for (int n = m; n < 3; n++) {
|
||||
a_arho2(i,nv2) += A2j * delij[m] * delij[n];
|
||||
a_arho2(j,nv2) += A2i * delij[m] * delij[n];
|
||||
nv2++;
|
||||
for (int p = n; p < 3; p++) {
|
||||
a_arho3(i,nv3) += A3j * delij[m] * delij[n] * delij[p];
|
||||
a_arho3(j,nv3) += -A3i * delij[m] * delij[n] * delij[p];
|
||||
nv3++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
//Cutoff function and derivative
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dfcut(const double xi, double& dfc) const
|
||||
{
|
||||
if (xi >= 1.0) {
|
||||
dfc = 0.0;
|
||||
return 1.0;
|
||||
} else if (xi <= 0.0) {
|
||||
dfc = 0.0;
|
||||
return 0.0;
|
||||
} else {
|
||||
const double a = 1.0 - xi;
|
||||
const double a3 = a * a * a;
|
||||
const double a4 = a * a3;
|
||||
const double a1m4 = 1.0 - a4;
|
||||
|
||||
dfc = 8 * a1m4 * a3;
|
||||
return a1m4*a1m4;
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Derivative of Cikj w.r.t. rij
|
||||
// Inputs: rij,rij2,rik2,rjk2
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dCfunc(const double rij2, const double rik2, const double rjk2) const
|
||||
{
|
||||
const double rij4 = rij2 * rij2;
|
||||
const double a = rik2 - rjk2;
|
||||
const double b = rik2 + rjk2;
|
||||
const double asq = a*a;
|
||||
double denom = rij4 - asq;
|
||||
denom = denom * denom;
|
||||
return -4 * (-2 * rij2 * asq + rij4 * b + asq * b) / denom;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::dCfunc2(const double rij2, const double rik2, const double rjk2, double& dCikj1, double& dCikj2) const
|
||||
{
|
||||
const double rij4 = rij2 * rij2;
|
||||
const double rik4 = rik2 * rik2;
|
||||
const double rjk4 = rjk2 * rjk2;
|
||||
const double a = rik2 - rjk2;
|
||||
double denom = rij4 - a * a;
|
||||
denom = denom * denom;
|
||||
dCikj1 = 4 * rij2 * (rij4 + rik4 + 2 * rik2 * rjk2 - 3 * rjk4 - 2 * rij2 * a) / denom;
|
||||
dCikj2 = 4 * rij2 * (rij4 - 3 * rik4 + 2 * rik2 * rjk2 + rjk4 + 2 * rij2 * a) / denom;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::fcut(const double xi) const
|
||||
{
|
||||
double a;
|
||||
if (xi >= 1.0)
|
||||
return 1.0;
|
||||
else if (xi <= 0.0)
|
||||
return 0.0;
|
||||
else {
|
||||
// ( 1.d0 - (1.d0 - xi)**4 )**2, but with better codegen
|
||||
a = 1.0 - xi;
|
||||
a *= a; a *= a;
|
||||
a = 1.0 - a;
|
||||
return a * a;
|
||||
}
|
||||
}
|
||||
|
||||
613
src/KOKKOS/meam_force_kokkos.h
Normal file
613
src/KOKKOS/meam_force_kokkos.h
Normal file
@ -0,0 +1,613 @@
|
||||
#include "math_special_kokkos.h"
|
||||
#include "meam_kokkos.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
template <class DeviceType>
|
||||
void MEAMKokkos<DeviceType>::meam_force(
|
||||
int inum_half, int eflag_global, int eflag_atom, int vflag_global, int vflag_atom,
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type,
|
||||
typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d numneigh,
|
||||
typename AT::t_int_1d numneigh_full, typename AT::t_f_array f,
|
||||
typename ArrayTypes<DeviceType>::t_virial_array vatom, typename AT::t_int_1d d_ilist_half,
|
||||
typename AT::t_int_1d d_offset, typename AT::t_neighbors_2d d_neighbors_half,
|
||||
typename AT::t_neighbors_2d d_neighbors_full, int neighflag, int need_dup, EV_FLOAT &ev_all)
|
||||
{
|
||||
EV_FLOAT ev;
|
||||
|
||||
this->eflag_either = eflag_either;
|
||||
this->eflag_global = eflag_global;
|
||||
this->eflag_atom = eflag_atom;
|
||||
this->vflag_global = vflag_global;
|
||||
this->vflag_atom = vflag_atom;
|
||||
eflag_either = eflag_atom || eflag_global;
|
||||
vflag_either = vflag_atom || vflag_global;
|
||||
this->d_eatom = eatom;
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->x = x;
|
||||
this->d_numneigh_half = numneigh;
|
||||
this->d_numneigh_full = numneigh_full;
|
||||
this->d_neighbors_half = d_neighbors_half;
|
||||
this->d_neighbors_full = d_neighbors_full;
|
||||
this->f = f;
|
||||
this->d_vatom = vatom;
|
||||
this->d_ilist_half = d_ilist_half;
|
||||
this->d_offset = d_offset;
|
||||
|
||||
if (need_dup) {
|
||||
dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum,
|
||||
Kokkos::Experimental::ScatterDuplicated>(f);
|
||||
if (eflag_atom)
|
||||
dup_eatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
|
||||
if (vflag_atom)
|
||||
dup_vatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
|
||||
} else {
|
||||
ndup_f =
|
||||
Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum,
|
||||
Kokkos::Experimental::ScatterNonDuplicated>(f);
|
||||
if (eflag_atom)
|
||||
ndup_eatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
|
||||
if (vflag_atom)
|
||||
ndup_vatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
if (neighflag == HALF)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALF>>(0, inum_half),
|
||||
*this, ev);
|
||||
else if (neighflag == HALFTHREAD)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALFTHREAD>>(0, inum_half),
|
||||
*this, ev);
|
||||
ev_all += ev;
|
||||
copymode = 0;
|
||||
|
||||
if (need_dup) {
|
||||
Kokkos::Experimental::contribute(f, dup_f);
|
||||
if (eflag_atom) Kokkos::Experimental::contribute(d_eatom, dup_eatom);
|
||||
if (vflag_atom) Kokkos::Experimental::contribute(d_vatom, dup_vatom);
|
||||
|
||||
// free duplicated memory
|
||||
dup_f = decltype(dup_f)();
|
||||
if (eflag_atom) dup_eatom = decltype(dup_eatom)();
|
||||
if (vflag_atom) dup_vatom = decltype(dup_vatom)();
|
||||
}
|
||||
}
|
||||
|
||||
template <class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void MEAMKokkos<DeviceType>::operator()(TagMEAMForce<NEIGHFLAG>,
|
||||
const int &ii, EV_FLOAT &ev) const
|
||||
{
|
||||
int i, j, jn, k, kn, kk, m, n, p, q;
|
||||
int nv2, nv3, elti, eltj, eltk, ind;
|
||||
X_FLOAT xitmp, yitmp, zitmp, delij[3];
|
||||
double rij2, rij, rij3;
|
||||
double v[6], fi[3], fj[3];
|
||||
double third, sixth;
|
||||
double pp, dUdrij, dUdsij, dUdrijm[3], force, forcem;
|
||||
double recip, phi, phip;
|
||||
double sij;
|
||||
double a1, a1i, a1j, a2, a2i, a2j;
|
||||
double a3i, a3j;
|
||||
double shpi[3], shpj[3];
|
||||
double ai, aj, ro0i, ro0j, invrei, invrej;
|
||||
double rhoa0j, drhoa0j, rhoa0i, drhoa0i;
|
||||
double rhoa1j, drhoa1j, rhoa1i, drhoa1i;
|
||||
double rhoa2j, drhoa2j, rhoa2i, drhoa2i;
|
||||
double a3, a3a, rhoa3j, drhoa3j, rhoa3i, drhoa3i;
|
||||
double drho0dr1, drho0dr2, drho0ds1, drho0ds2;
|
||||
double drho1dr1, drho1dr2, drho1ds1, drho1ds2;
|
||||
double drho1drm1[3], drho1drm2[3];
|
||||
double drho2dr1, drho2dr2, drho2ds1, drho2ds2;
|
||||
double drho2drm1[3], drho2drm2[3];
|
||||
double drho3dr1, drho3dr2, drho3ds1, drho3ds2;
|
||||
double drho3drm1[3], drho3drm2[3];
|
||||
double dt1dr1, dt1dr2, dt1ds1, dt1ds2;
|
||||
double dt2dr1, dt2dr2, dt2ds1, dt2ds2;
|
||||
double dt3dr1, dt3dr2, dt3ds1, dt3ds2;
|
||||
double drhodr1, drhodr2, drhods1, drhods2, drhodrm1[3], drhodrm2[3];
|
||||
double arg;
|
||||
double arg1i1, arg1j1, arg1i2, arg1j2, arg1i3, arg1j3, arg3i3, arg3j3;
|
||||
double dsij1, dsij2, force1, force2;
|
||||
double t1i, t2i, t3i, t1j, t2j, t3j;
|
||||
int fnoffset;
|
||||
|
||||
// The f, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_f =
|
||||
ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_f), decltype(ndup_f)>::get(
|
||||
dup_f, ndup_f);
|
||||
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
auto v_eatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_eatom),
|
||||
decltype(ndup_eatom)>::get(dup_eatom, ndup_eatom);
|
||||
auto a_eatom = v_eatom.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
auto v_vatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_vatom),
|
||||
decltype(ndup_vatom)>::get(dup_vatom, ndup_vatom);
|
||||
auto a_vatom = v_vatom.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
|
||||
i = d_ilist_half[ii];
|
||||
fnoffset = d_offset[i];
|
||||
third = 1.0 / 3.0;
|
||||
sixth = 1.0 / 6.0;
|
||||
|
||||
elti = d_map[type[i]];
|
||||
if (elti < 0) return;
|
||||
|
||||
xitmp = x(i, 0);
|
||||
yitmp = x(i, 1);
|
||||
zitmp = x(i, 2);
|
||||
|
||||
// Treat each pair
|
||||
for (jn = 0; jn < d_numneigh_half[i]; jn++) {
|
||||
j = d_neighbors_half(i, jn);
|
||||
eltj = d_map[type[j]];
|
||||
|
||||
if (!iszero_kk(d_scrfcn[fnoffset + jn]) && eltj >= 0) {
|
||||
|
||||
sij = d_scrfcn[fnoffset + jn] * d_fcpair[fnoffset + jn];
|
||||
delij[0] = x(j, 0) - xitmp;
|
||||
delij[1] = x(j, 1) - yitmp;
|
||||
delij[2] = x(j, 2) - zitmp;
|
||||
rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
|
||||
if (rij2 < cutforcesq) {
|
||||
rij = sqrt(rij2);
|
||||
recip = 1.0 / rij;
|
||||
|
||||
// Compute phi and phip
|
||||
ind = eltind[elti][eltj];
|
||||
pp = rij * rdrar;
|
||||
kk = (int) pp;
|
||||
kk = (kk <= (nrar - 2)) ? kk : nrar - 2;
|
||||
pp = pp - kk;
|
||||
pp = (pp <= 1.0) ? pp : 1.0;
|
||||
phi = ((d_phirar3(ind, kk) * pp + d_phirar2(ind, kk)) * pp + d_phirar1(ind, kk)) * pp +
|
||||
d_phirar(ind, kk);
|
||||
phip = (d_phirar6(ind, kk) * pp + d_phirar5(ind, kk)) * pp + d_phirar4(ind, kk);
|
||||
|
||||
if (eflag_either) {
|
||||
double scaleij = d_scale(type[i], type[i]);
|
||||
double phi_sc = phi * scaleij;
|
||||
if (eflag_global) ev.evdwl += phi_sc * sij;
|
||||
if (eflag_atom) {
|
||||
a_eatom[i] += 0.5 * phi * sij;
|
||||
a_eatom[j] += 0.5 * phi * sij;
|
||||
}
|
||||
}
|
||||
|
||||
// write(1,*) "force_meamf: phi: ",phi
|
||||
// write(1,*) "force_meamf: phip: ",phip
|
||||
|
||||
// Compute pair densities and derivatives
|
||||
invrei = 1.0 / re_meam[elti][elti];
|
||||
ai = rij * invrei - 1.0;
|
||||
ro0i = rho0_meam[elti];
|
||||
rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai);
|
||||
drhoa0i = -beta0_meam[elti] * invrei * rhoa0i;
|
||||
rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai);
|
||||
drhoa1i = -beta1_meam[elti] * invrei * rhoa1i;
|
||||
rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai);
|
||||
drhoa2i = -beta2_meam[elti] * invrei * rhoa2i;
|
||||
rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai);
|
||||
drhoa3i = -beta3_meam[elti] * invrei * rhoa3i;
|
||||
|
||||
if (elti != eltj) {
|
||||
invrej = 1.0 / re_meam[eltj][eltj];
|
||||
aj = rij * invrej - 1.0;
|
||||
ro0j = rho0_meam[eltj];
|
||||
rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj);
|
||||
drhoa0j = -beta0_meam[eltj] * invrej * rhoa0j;
|
||||
rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj);
|
||||
drhoa1j = -beta1_meam[eltj] * invrej * rhoa1j;
|
||||
rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj);
|
||||
drhoa2j = -beta2_meam[eltj] * invrej * rhoa2j;
|
||||
rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj);
|
||||
drhoa3j = -beta3_meam[eltj] * invrej * rhoa3j;
|
||||
} else {
|
||||
rhoa0j = rhoa0i;
|
||||
drhoa0j = drhoa0i;
|
||||
rhoa1j = rhoa1i;
|
||||
drhoa1j = drhoa1i;
|
||||
rhoa2j = rhoa2i;
|
||||
drhoa2j = drhoa2i;
|
||||
rhoa3j = rhoa3i;
|
||||
drhoa3j = drhoa3i;
|
||||
}
|
||||
|
||||
const double t1mi = t1_meam[elti];
|
||||
const double t2mi = t2_meam[elti];
|
||||
const double t3mi = t3_meam[elti];
|
||||
const double t1mj = t1_meam[eltj];
|
||||
const double t2mj = t2_meam[eltj];
|
||||
const double t3mj = t3_meam[eltj];
|
||||
|
||||
if (ialloy == 1) {
|
||||
rhoa1j *= t1mj;
|
||||
rhoa2j *= t2mj;
|
||||
rhoa3j *= t3mj;
|
||||
rhoa1i *= t1mi;
|
||||
rhoa2i *= t2mi;
|
||||
rhoa3i *= t3mi;
|
||||
drhoa1j *= t1mj;
|
||||
drhoa2j *= t2mj;
|
||||
drhoa3j *= t3mj;
|
||||
drhoa1i *= t1mi;
|
||||
drhoa2i *= t2mi;
|
||||
drhoa3i *= t3mi;
|
||||
}
|
||||
|
||||
nv2 = 0;
|
||||
nv3 = 0;
|
||||
arg1i1 = 0.0;
|
||||
arg1j1 = 0.0;
|
||||
arg1i2 = 0.0;
|
||||
arg1j2 = 0.0;
|
||||
arg1i3 = 0.0;
|
||||
arg1j3 = 0.0;
|
||||
arg3i3 = 0.0;
|
||||
arg3j3 = 0.0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
for (p = n; p < 3; p++) {
|
||||
for (q = p; q < 3; q++) {
|
||||
arg = delij[n] * delij[p] * delij[q] * v3D[nv3];
|
||||
arg1i3 = arg1i3 + d_arho3(i, nv3) * arg;
|
||||
arg1j3 = arg1j3 - d_arho3(j, nv3) * arg;
|
||||
nv3 = nv3 + 1;
|
||||
}
|
||||
arg = delij[n] * delij[p] * v2D[nv2];
|
||||
arg1i2 = arg1i2 + d_arho2(i, nv2) * arg;
|
||||
arg1j2 = arg1j2 + d_arho2(j, nv2) * arg;
|
||||
nv2 = nv2 + 1;
|
||||
}
|
||||
arg1i1 = arg1i1 + d_arho1(i, n) * delij[n];
|
||||
arg1j1 = arg1j1 - d_arho1(j, n) * delij[n];
|
||||
arg3i3 = arg3i3 + d_arho3b(i, n) * delij[n];
|
||||
arg3j3 = arg3j3 - d_arho3b(j, n) * delij[n];
|
||||
}
|
||||
|
||||
// rho0 terms
|
||||
drho0dr1 = drhoa0j * sij;
|
||||
drho0dr2 = drhoa0i * sij;
|
||||
|
||||
// rho1 terms
|
||||
a1 = 2 * sij / rij;
|
||||
drho1dr1 = a1 * (drhoa1j - rhoa1j / rij) * arg1i1;
|
||||
drho1dr2 = a1 * (drhoa1i - rhoa1i / rij) * arg1j1;
|
||||
a1 = 2.0 * sij / rij;
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho1drm1[m] = a1 * rhoa1j * d_arho1(i, m);
|
||||
drho1drm2[m] = -a1 * rhoa1i * d_arho1(j, m);
|
||||
}
|
||||
|
||||
// rho2 terms
|
||||
a2 = 2 * sij / rij2;
|
||||
drho2dr1 =
|
||||
a2 * (drhoa2j - 2 * rhoa2j / rij) * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * drhoa2j * sij;
|
||||
drho2dr2 =
|
||||
a2 * (drhoa2i - 2 * rhoa2i / rij) * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * drhoa2i * sij;
|
||||
a2 = 4 * sij / rij2;
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho2drm1[m] = 0.0;
|
||||
drho2drm2[m] = 0.0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
drho2drm1[m] = drho2drm1[m] + d_arho2(i, vind2D[m][n]) * delij[n];
|
||||
drho2drm2[m] = drho2drm2[m] - d_arho2(j, vind2D[m][n]) * delij[n];
|
||||
}
|
||||
drho2drm1[m] = a2 * rhoa2j * drho2drm1[m];
|
||||
drho2drm2[m] = -a2 * rhoa2i * drho2drm2[m];
|
||||
}
|
||||
|
||||
// rho3 terms
|
||||
rij3 = rij * rij2;
|
||||
a3 = 2 * sij / rij3;
|
||||
a3a = 6.0 / 5.0 * sij / rij;
|
||||
drho3dr1 =
|
||||
a3 * (drhoa3j - 3 * rhoa3j / rij) * arg1i3 - a3a * (drhoa3j - rhoa3j / rij) * arg3i3;
|
||||
drho3dr2 =
|
||||
a3 * (drhoa3i - 3 * rhoa3i / rij) * arg1j3 - a3a * (drhoa3i - rhoa3i / rij) * arg3j3;
|
||||
a3 = 6 * sij / rij3;
|
||||
a3a = 6 * sij / (5 * rij);
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho3drm1[m] = 0.0;
|
||||
drho3drm2[m] = 0.0;
|
||||
nv2 = 0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
for (p = n; p < 3; p++) {
|
||||
arg = delij[n] * delij[p] * v2D[nv2];
|
||||
drho3drm1[m] = drho3drm1[m] + d_arho3(i, vind3D[m][n][p]) * arg;
|
||||
drho3drm2[m] = drho3drm2[m] + d_arho3(j, vind3D[m][n][p]) * arg;
|
||||
nv2 = nv2 + 1;
|
||||
}
|
||||
}
|
||||
drho3drm1[m] = (a3 * drho3drm1[m] - a3a * d_arho3b(i, m)) * rhoa3j;
|
||||
drho3drm2[m] = (-a3 * drho3drm2[m] + a3a * d_arho3b(j, m)) * rhoa3i;
|
||||
}
|
||||
|
||||
// Compute derivatives of weighting functions t wrt rij
|
||||
t1i = d_t_ave(i, 0);
|
||||
t2i = d_t_ave(i, 1);
|
||||
t3i = d_t_ave(i, 2);
|
||||
t1j = d_t_ave(j, 0);
|
||||
t2j = d_t_ave(j, 1);
|
||||
t3j = d_t_ave(j, 2);
|
||||
|
||||
if (ialloy == 1) {
|
||||
|
||||
a1i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 0));
|
||||
a1j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 0));
|
||||
a2i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 1));
|
||||
a2j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 1));
|
||||
a3i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 2));
|
||||
a3j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 2));
|
||||
|
||||
dt1dr1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
|
||||
dt1dr2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
|
||||
dt2dr1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
|
||||
dt2dr2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
|
||||
dt3dr1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
|
||||
dt3dr2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
|
||||
|
||||
} else if (ialloy == 2) {
|
||||
|
||||
dt1dr1 = 0.0;
|
||||
dt1dr2 = 0.0;
|
||||
dt2dr1 = 0.0;
|
||||
dt2dr2 = 0.0;
|
||||
dt3dr1 = 0.0;
|
||||
dt3dr2 = 0.0;
|
||||
|
||||
} else {
|
||||
|
||||
ai = 0.0;
|
||||
if (!iszero_kk(d_rho0[i])) ai = drhoa0j * sij / d_rho0[i];
|
||||
aj = 0.0;
|
||||
if (!iszero_kk(d_rho0[j])) aj = drhoa0i * sij / d_rho0[j];
|
||||
|
||||
dt1dr1 = ai * (t1mj - t1i);
|
||||
dt1dr2 = aj * (t1mi - t1j);
|
||||
dt2dr1 = ai * (t2mj - t2i);
|
||||
dt2dr2 = aj * (t2mi - t2j);
|
||||
dt3dr1 = ai * (t3mj - t3i);
|
||||
dt3dr2 = aj * (t3mi - t3j);
|
||||
}
|
||||
|
||||
// Compute derivatives of total density wrt rij, sij and rij(3)
|
||||
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpi);
|
||||
get_shpfcn(lattce_meam[eltj][eltj], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpj);
|
||||
|
||||
drhodr1 = d_dgamma1[i] * drho0dr1 +
|
||||
d_dgamma2[i] *
|
||||
(dt1dr1 * d_rho1[i] + t1i * drho1dr1 + dt2dr1 * d_rho2[i] + t2i * drho2dr1 +
|
||||
dt3dr1 * d_rho3[i] + t3i * drho3dr1) -
|
||||
d_dgamma3[i] * (shpi[0] * dt1dr1 + shpi[1] * dt2dr1 + shpi[2] * dt3dr1);
|
||||
drhodr2 = d_dgamma1[j] * drho0dr2 +
|
||||
d_dgamma2[j] *
|
||||
(dt1dr2 * d_rho1[j] + t1j * drho1dr2 + dt2dr2 * d_rho2[j] + t2j * drho2dr2 +
|
||||
dt3dr2 * d_rho3[j] + t3j * drho3dr2) -
|
||||
d_dgamma3[j] * (shpj[0] * dt1dr2 + shpj[1] * dt2dr2 + shpj[2] * dt3dr2);
|
||||
for (m = 0; m < 3; m++) {
|
||||
drhodrm1[m] = 0.0;
|
||||
drhodrm2[m] = 0.0;
|
||||
drhodrm1[m] =
|
||||
d_dgamma2[i] * (t1i * drho1drm1[m] + t2i * drho2drm1[m] + t3i * drho3drm1[m]);
|
||||
drhodrm2[m] =
|
||||
d_dgamma2[j] * (t1j * drho1drm2[m] + t2j * drho2drm2[m] + t3j * drho3drm2[m]);
|
||||
}
|
||||
|
||||
// Compute derivatives wrt sij, but only if necessary
|
||||
if (!iszero_kk(d_dscrfcn[fnoffset + jn])) {
|
||||
drho0ds1 = rhoa0j;
|
||||
drho0ds2 = rhoa0i;
|
||||
a1 = 2.0 / rij;
|
||||
drho1ds1 = a1 * rhoa1j * arg1i1;
|
||||
drho1ds2 = a1 * rhoa1i * arg1j1;
|
||||
a2 = 2.0 / rij2;
|
||||
drho2ds1 = a2 * rhoa2j * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * rhoa2j;
|
||||
drho2ds2 = a2 * rhoa2i * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * rhoa2i;
|
||||
a3 = 2.0 / rij3;
|
||||
a3a = 6.0 / (5.0 * rij);
|
||||
drho3ds1 = a3 * rhoa3j * arg1i3 - a3a * rhoa3j * arg3i3;
|
||||
drho3ds2 = a3 * rhoa3i * arg1j3 - a3a * rhoa3i * arg3j3;
|
||||
|
||||
if (ialloy == 1) {
|
||||
a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 0));
|
||||
a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 0));
|
||||
a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 1));
|
||||
a2j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 1));
|
||||
a3i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 2));
|
||||
a3j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 2));
|
||||
|
||||
dt1ds1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
|
||||
dt1ds2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
|
||||
dt2ds1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
|
||||
dt2ds2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
|
||||
dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
|
||||
dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
|
||||
|
||||
} else if (ialloy == 2) {
|
||||
|
||||
dt1ds1 = 0.0;
|
||||
dt1ds2 = 0.0;
|
||||
dt2ds1 = 0.0;
|
||||
dt2ds2 = 0.0;
|
||||
dt3ds1 = 0.0;
|
||||
dt3ds2 = 0.0;
|
||||
|
||||
} else {
|
||||
|
||||
ai = 0.0;
|
||||
if (!iszero_kk(d_rho0[i])) ai = rhoa0j / d_rho0[i];
|
||||
aj = 0.0;
|
||||
if (!iszero_kk(d_rho0[j])) aj = rhoa0i / d_rho0[j];
|
||||
|
||||
dt1ds1 = ai * (t1mj - t1i);
|
||||
dt1ds2 = aj * (t1mi - t1j);
|
||||
dt2ds1 = ai * (t2mj - t2i);
|
||||
dt2ds2 = aj * (t2mi - t2j);
|
||||
dt3ds1 = ai * (t3mj - t3i);
|
||||
dt3ds2 = aj * (t3mi - t3j);
|
||||
}
|
||||
|
||||
drhods1 = d_dgamma1[i] * drho0ds1 +
|
||||
d_dgamma2[i] *
|
||||
(dt1ds1 * d_rho1[i] + t1i * drho1ds1 + dt2ds1 * d_rho2[i] + t2i * drho2ds1 +
|
||||
dt3ds1 * d_rho3[i] + t3i * drho3ds1) -
|
||||
d_dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1);
|
||||
drhods2 = d_dgamma1[j] * drho0ds2 +
|
||||
d_dgamma2[j] *
|
||||
(dt1ds2 * d_rho1[j] + t1j * drho1ds2 + dt2ds2 * d_rho2[j] + t2j * drho2ds2 +
|
||||
dt3ds2 * d_rho3[j] + t3j * drho3ds2) -
|
||||
d_dgamma3[j] * (shpj[0] * dt1ds2 + shpj[1] * dt2ds2 + shpj[2] * dt3ds2);
|
||||
}
|
||||
|
||||
// Compute derivatives of energy wrt rij, sij and rij[3]
|
||||
dUdrij = phip * sij + d_frhop[i] * drhodr1 + d_frhop[j] * drhodr2;
|
||||
dUdsij = 0.0;
|
||||
if (!iszero_kk(d_dscrfcn[fnoffset + jn])) {
|
||||
dUdsij = phi + d_frhop[i] * drhods1 + d_frhop[j] * drhods2;
|
||||
}
|
||||
for (m = 0; m < 3; m++) {
|
||||
dUdrijm[m] = d_frhop[i] * drhodrm1[m] + d_frhop[j] * drhodrm2[m];
|
||||
}
|
||||
|
||||
// Add the part of the force due to dUdrij and dUdsij
|
||||
force = dUdrij * recip + dUdsij * d_dscrfcn[fnoffset + jn];
|
||||
for (m = 0; m < 3; m++) {
|
||||
forcem = delij[m] * force + dUdrijm[m];
|
||||
a_f(i, m) += forcem;
|
||||
a_f(j, m) -= forcem;
|
||||
}
|
||||
|
||||
// Tabulate per-atom virial as symmetrized stress tensor
|
||||
|
||||
if (vflag_either) {
|
||||
fi[0] = delij[0] * force + dUdrijm[0];
|
||||
fi[1] = delij[1] * force + dUdrijm[1];
|
||||
fi[2] = delij[2] * force + dUdrijm[2];
|
||||
v[0] = -0.5 * (delij[0] * fi[0]);
|
||||
v[1] = -0.5 * (delij[1] * fi[1]);
|
||||
v[2] = -0.5 * (delij[2] * fi[2]);
|
||||
v[3] = -0.25 * (delij[0] * fi[1] + delij[1] * fi[0]);
|
||||
v[4] = -0.25 * (delij[0] * fi[2] + delij[2] * fi[0]);
|
||||
v[5] = -0.25 * (delij[1] * fi[2] + delij[2] * fi[1]);
|
||||
|
||||
if (vflag_global)
|
||||
for (m = 0; m < 6; m++) ev.v[m] += 2.0 * v[m];
|
||||
|
||||
if (vflag_atom) {
|
||||
for (m = 0; m < 6; m++) {
|
||||
a_vatom(i, m) += v[m];
|
||||
a_vatom(j, m) += v[m];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now compute forces on other atoms k due to change in sij
|
||||
|
||||
if (iszero_kk(sij) || isone_kk(sij)) continue; //: cont jn loop
|
||||
|
||||
double dxik(0), dyik(0), dzik(0);
|
||||
double dxjk(0), dyjk(0), dzjk(0);
|
||||
|
||||
for (kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
k = d_neighbors_full(i, kn);
|
||||
eltk = d_map[type[k]];
|
||||
if (k != j && eltk >= 0) {
|
||||
double xik, xjk, cikj, sikj, dfc, a;
|
||||
double dCikj1, dCikj2;
|
||||
double delc, rik2, rjk2;
|
||||
|
||||
sij = d_scrfcn[jn + fnoffset] * d_fcpair[jn + fnoffset];
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
|
||||
dsij1 = 0.0;
|
||||
dsij2 = 0.0;
|
||||
if (!iszero_kk(sij) && !isone_kk(sij)) {
|
||||
const double rbound = rij2 * ebound_meam[elti][eltj];
|
||||
delc = Cmax - Cmin;
|
||||
dxjk = x(k, 0) - x(j, 0);
|
||||
dyjk = x(k, 1) - x(j, 1);
|
||||
dzjk = x(k, 2) - x(j, 2);
|
||||
rjk2 = dxjk * dxjk + dyjk * dyjk + dzjk * dzjk;
|
||||
if (rjk2 <= rbound) {
|
||||
dxik = x(k, 0) - x(i, 0);
|
||||
dyik = x(k, 1) - x(i, 1);
|
||||
dzik = x(k, 2) - x(i, 2);
|
||||
rik2 = dxik * dxik + dyik * dyik + dzik * dzik;
|
||||
if (rik2 <= rbound) {
|
||||
xik = rik2 / rij2;
|
||||
xjk = rjk2 / rij2;
|
||||
a = 1 - (xik - xjk) * (xik - xjk);
|
||||
if (!iszero_kk(a)) {
|
||||
cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
if (cikj >= Cmin && cikj <= Cmax) {
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
sikj = dfcut(cikj, dfc);
|
||||
dCfunc2(rij2, rik2, rjk2, dCikj1, dCikj2);
|
||||
a = sij / delc * dfc / sikj;
|
||||
dsij1 = a * dCikj1;
|
||||
dsij2 = a * dCikj2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!iszero_kk(dsij1) || !iszero_kk(dsij2)) {
|
||||
force1 = dUdsij * dsij1;
|
||||
force2 = dUdsij * dsij2;
|
||||
|
||||
a_f(i, 0) += force1 * dxik;
|
||||
a_f(i, 1) += force1 * dyik;
|
||||
a_f(i, 2) += force1 * dzik;
|
||||
a_f(j, 0) += force2 * dxjk;
|
||||
a_f(j, 1) += force2 * dyjk;
|
||||
a_f(j, 2) += force2 * dzjk;
|
||||
a_f(k, 0) -= force1 * dxik + force2 * dxjk;
|
||||
a_f(k, 1) -= force1 * dyik + force2 * dyjk;
|
||||
a_f(k, 2) -= force1 * dzik + force2 * dzjk;
|
||||
|
||||
// Tabulate per-atom virial as symmetrized stress tensor
|
||||
|
||||
if (vflag_either) {
|
||||
fi[0] = force1 * dxik;
|
||||
fi[1] = force1 * dyik;
|
||||
fi[2] = force1 * dzik;
|
||||
fj[0] = force2 * dxjk;
|
||||
fj[1] = force2 * dyjk;
|
||||
fj[2] = force2 * dzjk;
|
||||
v[0] = -third * (dxik * fi[0] + dxjk * fj[0]);
|
||||
v[1] = -third * (dyik * fi[1] + dyjk * fj[1]);
|
||||
v[2] = -third * (dzik * fi[2] + dzjk * fj[2]);
|
||||
v[3] = -sixth * (dxik * fi[1] + dxjk * fj[1] + dyik * fi[0] + dyjk * fj[0]);
|
||||
v[4] = -sixth * (dxik * fi[2] + dxjk * fj[2] + dzik * fi[0] + dzjk * fj[0]);
|
||||
v[5] = -sixth * (dyik * fi[2] + dyjk * fj[2] + dzik * fi[1] + dzjk * fj[1]);
|
||||
|
||||
if (vflag_global)
|
||||
for (m = 0; m < 6; m++) ev.v[m] += 3.0 * v[m];
|
||||
|
||||
if (vflag_atom) {
|
||||
for (m = 0; m < 6; m++) {
|
||||
a_vatom(i, m) += v[m];
|
||||
a_vatom(j, m) += v[m];
|
||||
a_vatom(k, m) += v[m];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// end of k loop
|
||||
}
|
||||
}
|
||||
}
|
||||
// end of j loop
|
||||
}
|
||||
}
|
||||
289
src/KOKKOS/meam_funcs_kokkos.h
Normal file
289
src/KOKKOS/meam_funcs_kokkos.h
Normal file
@ -0,0 +1,289 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Naga Vydyanathan (NVIDIA)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math_special_kokkos.h"
|
||||
#include <cmath>
|
||||
#include "meam_kokkos.h"
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute G(gamma) based on selection flag ibar:
|
||||
// 0 => G = sqrt(1+gamma)
|
||||
// 1 => G = exp(gamma/2)
|
||||
// 2 => not implemented
|
||||
// 3 => G = 2/(1+exp(-gamma))
|
||||
// 4 => G = sqrt(1+gamma)
|
||||
// -5 => G = +-sqrt(abs(1+gamma))
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::G_gam(const double gamma, const int ibar, int &errorflag) const
|
||||
{
|
||||
double gsmooth_switchpoint;
|
||||
|
||||
switch (ibar) {
|
||||
case 0:
|
||||
case 4:
|
||||
gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1);
|
||||
if (gamma < gsmooth_switchpoint) {
|
||||
// e.g. gsmooth_factor is 99, {:
|
||||
// gsmooth_switchpoint = -0.99
|
||||
// G = 0.01*(-0.99/gamma)**99
|
||||
double G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor);
|
||||
return sqrt(G);
|
||||
} else {
|
||||
return sqrt(1.0 + gamma);
|
||||
}
|
||||
case 1:
|
||||
return MathSpecialKokkos::fm_exp(gamma / 2.0);
|
||||
case 3:
|
||||
return 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma));
|
||||
case -5:
|
||||
if ((1.0 + gamma) >= 0) {
|
||||
return sqrt(1.0 + gamma);
|
||||
} else {
|
||||
return -sqrt(-1.0 - gamma);
|
||||
}
|
||||
}
|
||||
errorflag = 1;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute G(gamma and dG(gamma) based on selection flag ibar:
|
||||
// 0 => G = sqrt(1+gamma)
|
||||
// 1 => G = exp(gamma/2)
|
||||
// 2 => not implemented
|
||||
// 3 => G = 2/(1+exp(-gamma))
|
||||
// 4 => G = sqrt(1+gamma)
|
||||
// -5 => G = +-sqrt(abs(1+gamma))
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dG_gam(const double gamma, const int ibar, double& dG) const
|
||||
{
|
||||
double gsmooth_switchpoint;
|
||||
double G;
|
||||
|
||||
switch (ibar) {
|
||||
case 0:
|
||||
case 4:
|
||||
gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1);
|
||||
if (gamma < gsmooth_switchpoint) {
|
||||
// e.g. gsmooth_factor is 99, {:
|
||||
// gsmooth_switchpoint = -0.99
|
||||
// G = 0.01*(-0.99/gamma)**99
|
||||
G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor);
|
||||
G = sqrt(G);
|
||||
dG = -gsmooth_factor * G / (2.0 * gamma);
|
||||
return G;
|
||||
} else {
|
||||
G = sqrt(1.0 + gamma);
|
||||
dG = 1.0 / (2.0 * G);
|
||||
return G;
|
||||
}
|
||||
case 1:
|
||||
G = MathSpecialKokkos::fm_exp(gamma / 2.0);
|
||||
dG = G / 2.0;
|
||||
return G;
|
||||
case 3:
|
||||
G = 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma));
|
||||
dG = G * (2.0 - G) / 2;
|
||||
return G;
|
||||
case -5:
|
||||
if ((1.0 + gamma) >= 0) {
|
||||
G = sqrt(1.0 + gamma);
|
||||
dG = 1.0 / (2.0 * G);
|
||||
return G;
|
||||
} else {
|
||||
G = -sqrt(-1.0 - gamma);
|
||||
dG = -1.0 / (2.0 * G);
|
||||
return G;
|
||||
}
|
||||
}
|
||||
dG = 1.0;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute ZBL potential
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::zbl(const double r, const int z1, const int z2) const
|
||||
{
|
||||
int i;
|
||||
const double c[] = { 0.028171, 0.28022, 0.50986, 0.18175 };
|
||||
const double d[] = { 0.20162, 0.40290, 0.94229, 3.1998 };
|
||||
const double azero = 0.4685;
|
||||
const double cc = 14.3997;
|
||||
double a, x;
|
||||
// azero = (9pi^2/128)^1/3 (0.529) Angstroms
|
||||
a = azero / (pow(z1, 0.23) + pow(z2, 0.23));
|
||||
double result = 0.0;
|
||||
x = r / a;
|
||||
for (i = 0; i <= 3; i++) {
|
||||
result = result + c[i] * MathSpecialKokkos::fm_exp(-d[i] * x);
|
||||
}
|
||||
if (r > 0.0)
|
||||
result = result * z1 * z2 / r * cc;
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute embedding function F(rhobar) and derivative F'(rhobar), eqn I.5
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::embedding(const double A, const double Ec, const double rhobar, double& dF) const
|
||||
{
|
||||
const double AEc = A * Ec;
|
||||
|
||||
if (rhobar > 0.0) {
|
||||
const double lrb = log(rhobar);
|
||||
dF = AEc * (1.0 + lrb);
|
||||
return AEc * rhobar * lrb;
|
||||
} else {
|
||||
if (emb_lin_neg == 0) {
|
||||
dF = 0.0;
|
||||
return 0.0;
|
||||
} else {
|
||||
dF = - AEc;
|
||||
return - AEc * rhobar;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute Rose energy function, I.16
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::erose(const double r, const double re, const double alpha, const double Ec, const double repuls,
|
||||
const double attrac, const int form) const
|
||||
{
|
||||
double astar, a3;
|
||||
double result = 0.0;
|
||||
|
||||
if (r > 0.0) {
|
||||
astar = alpha * (r / re - 1.0);
|
||||
a3 = 0.0;
|
||||
if (astar >= 0)
|
||||
a3 = attrac;
|
||||
else if (astar < 0)
|
||||
a3 = repuls;
|
||||
|
||||
if (form == 1)
|
||||
result = -Ec * (1 + astar + (-attrac + repuls / r) * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
else if (form == 2)
|
||||
result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
else
|
||||
result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar) / (r / re)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Shape factors for various configurations
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, const double sthe, const double cthe, double (&s)[3]) const
|
||||
{
|
||||
switch (latt) {
|
||||
case FCC:
|
||||
case BCC:
|
||||
case B1:
|
||||
case B2:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 0.0;
|
||||
break;
|
||||
case HCP:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 1.0 / 3.0;
|
||||
break;
|
||||
case CH4: // CH4 actually needs shape factor for diamond for C, dimer for H
|
||||
case DIA:
|
||||
case DIA3:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 32.0 / 9.0;
|
||||
break;
|
||||
case DIM:
|
||||
s[0] = 1.0;
|
||||
s[1] = 2.0 / 3.0;
|
||||
// s(4) = 1.d0 // this should be 0.4 unless (1-legendre) is multiplied in the density calc.
|
||||
s[2] = 0.40; // this is (1-legendre) where legendre = 0.6 in dynamo is accounted.
|
||||
break;
|
||||
case LIN: // linear, theta being 180
|
||||
s[0] = 0.0;
|
||||
s[1] = 8.0 / 3.0; // 4*(co**4 + si**4 - 1.0/3.0) in zig become 4*(1-1/3)
|
||||
s[2] = 0.0;
|
||||
break;
|
||||
case ZIG: //zig-zag
|
||||
case TRI: //trimer e.g. H2O
|
||||
s[0] = 4.0*pow(cthe,2);
|
||||
s[1] = 4.0*(pow(cthe,4) + pow(sthe,4) - 1.0/3.0);
|
||||
s[2] = 4.0*(pow(cthe,2) * (3*pow(sthe,4) + pow(cthe,4)));
|
||||
s[2] = s[2] - 0.6*s[0]; //legend in dyn, 0.6 is default value.
|
||||
break;
|
||||
default:
|
||||
s[0] = 0.0;
|
||||
// call error('Lattice not defined in get_shpfcn.')
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Number of neighbors for the reference structure
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int MEAMKokkos<DeviceType>::get_Zij(const lattice_t latt) const
|
||||
{
|
||||
switch (latt) {
|
||||
case FCC:
|
||||
return 12;
|
||||
case BCC:
|
||||
return 8;
|
||||
case HCP:
|
||||
return 12;
|
||||
case DIA:
|
||||
case DIA3:
|
||||
return 4;
|
||||
case DIM:
|
||||
return 1;
|
||||
case B1:
|
||||
return 6;
|
||||
case C11:
|
||||
return 10;
|
||||
case L12:
|
||||
return 12;
|
||||
case B2:
|
||||
return 8;
|
||||
case CH4: // DYNAMO currently implemented this way while it needs two Z values, 4 and 1
|
||||
return 4;
|
||||
case LIN:
|
||||
case ZIG:
|
||||
case TRI:
|
||||
return 2;
|
||||
// call error('Lattice not defined in get_Zij.')
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
68
src/KOKKOS/meam_impl_kokkos.h
Normal file
68
src/KOKKOS/meam_impl_kokkos.h
Normal file
@ -0,0 +1,68 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Naga Vydyanathan (NVIDIA), Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "memory_kokkos.h"
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
MEAMKokkos<DeviceType>::MEAMKokkos(Memory *mem) : MEAM(mem)
|
||||
{
|
||||
d_errorflag = typename AT::t_int_scalar("meam:errorflag");
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
MEAMKokkos<DeviceType>::~MEAMKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
|
||||
|
||||
memoryKK->destroy_kokkos(k_rho,rho);
|
||||
memoryKK->destroy_kokkos(k_rho0,rho0);
|
||||
memoryKK->destroy_kokkos(k_rho1,rho1);
|
||||
memoryKK->destroy_kokkos(k_rho2,rho2);
|
||||
memoryKK->destroy_kokkos(k_rho3,rho3);
|
||||
memoryKK->destroy_kokkos(k_frhop,frhop);
|
||||
memoryKK->destroy_kokkos(k_gamma,gamma);
|
||||
memoryKK->destroy_kokkos(k_dgamma1,dgamma1);
|
||||
memoryKK->destroy_kokkos(k_dgamma2,dgamma2);
|
||||
memoryKK->destroy_kokkos(k_dgamma3,dgamma3);
|
||||
memoryKK->destroy_kokkos(k_arho2b,arho2b);
|
||||
|
||||
memoryKK->destroy_kokkos(k_arho1,arho1);
|
||||
memoryKK->destroy_kokkos(k_arho2,arho2);
|
||||
memoryKK->destroy_kokkos(k_arho3,arho3);
|
||||
memoryKK->destroy_kokkos(k_arho3b,arho3b);
|
||||
memoryKK->destroy_kokkos(k_t_ave,t_ave);
|
||||
memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave);
|
||||
|
||||
memoryKK->destroy_kokkos(k_scrfcn,scrfcn);
|
||||
memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn);
|
||||
memoryKK->destroy_kokkos(k_fcpair,fcpair);
|
||||
}
|
||||
|
||||
#include "meam_setup_done_kokkos.h"
|
||||
#include "meam_funcs_kokkos.h"
|
||||
#include "meam_dens_init_kokkos.h"
|
||||
#include "meam_dens_final_kokkos.h"
|
||||
#include "meam_force_kokkos.h"
|
||||
|
||||
224
src/KOKKOS/meam_kokkos.h
Normal file
224
src/KOKKOS/meam_kokkos.h
Normal file
@ -0,0 +1,224 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_MEAMKOKKOS_H
|
||||
#define LMP_MEAMKOKKOS_H
|
||||
|
||||
#include "kokkos.h"
|
||||
#include "meam.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagMEAMDensFinal {};
|
||||
template <int NEIGHFLAG> struct TagMEAMDensInit {
|
||||
};
|
||||
struct TagMEAMZero {};
|
||||
template <int NEIGHFLAG> struct TagMEAMForce {
|
||||
};
|
||||
|
||||
template <class DeviceType> class MEAMKokkos : public MEAM {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
MEAMKokkos(Memory *mem);
|
||||
~MEAMKokkos() override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagMEAMDensFinal, const int &, EV_FLOAT &) const;
|
||||
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void operator()(TagMEAMDensInit<NEIGHFLAG>, const int &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagMEAMZero, const int &) const;
|
||||
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void operator()(TagMEAMForce<NEIGHFLAG>, const int &, EV_FLOAT &) const;
|
||||
|
||||
private:
|
||||
// parameters to meam_dens_init
|
||||
|
||||
int ntype, nlocal;
|
||||
typename AT::t_int_1d type;
|
||||
typename AT::t_int_1d d_offset;
|
||||
typename AT::t_int_1d d_map;
|
||||
typename AT::t_int_2d d_scale;
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_int_1d d_numneigh_half;
|
||||
typename AT::t_int_1d d_numneigh_full;
|
||||
typename AT::t_neighbors_2d d_neighbors_half;
|
||||
typename AT::t_neighbors_2d d_neighbors_full;
|
||||
typename AT::t_int_1d d_ilist_half;
|
||||
typename AT::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
// parameters to meam_dens_final
|
||||
|
||||
typename AT::t_int_scalar d_errorflag;
|
||||
int eflag_either, eflag_global, eflag_atom, vflag_either, vflag_global, vflag_atom;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
|
||||
public:
|
||||
void meam_dens_setup(int, int, int) override;
|
||||
void meam_setup_done(double *) override;
|
||||
void meam_dens_init(int, int, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_x_array, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, typename AT::t_neighbors_2d,
|
||||
typename AT::t_neighbors_2d, typename AT::t_int_1d, int, int);
|
||||
void meam_dens_final(int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d, int,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_int_2d, int &,
|
||||
EV_FLOAT &);
|
||||
void meam_force(int, int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d, int,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_x_array,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_f_array,
|
||||
typename ArrayTypes<DeviceType>::t_virial_array, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, typename AT::t_neighbors_2d, typename AT::t_neighbors_2d,
|
||||
int, int, EV_FLOAT &);
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void getscreen(int, int, typename AT::t_x_array, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, int, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d) const;
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void calc_rho1(int, int, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_x_array, typename AT::t_int_1d, int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double fcut(const double xi) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dfcut(const double xi, double &dfc) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dCfunc(const double, const double, const double) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void dCfunc2(const double, const double, const double, double &, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double G_gam(const double, const int, int &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dG_gam(const double, const int, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double zbl(const double, const int, const int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double embedding(const double, const double, const double, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double erose(const double, const double, const double, const double, const double, const double,
|
||||
const int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void get_shpfcn(const lattice_t latt, const double sthe, const double cthe, double (&s)[3]) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_Zij(const lattice_t) const;
|
||||
|
||||
public:
|
||||
DAT::tdual_ffloat_1d k_rho, k_rho0, k_rho1, k_rho2, k_rho3, k_frhop;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_rho, d_rho0, d_rho1, d_rho2, d_rho3, d_frhop;
|
||||
HAT::t_ffloat_1d h_rho, h_rho0, h_rho1, h_rho2, h_rho3, h_frhop;
|
||||
DAT::tdual_ffloat_1d k_gamma, k_dgamma1, k_dgamma2, k_dgamma3, k_arho2b;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_gamma, d_dgamma1, d_dgamma2, d_dgamma3, d_arho2b;
|
||||
HAT::t_ffloat_1d h_gamma, h_dgamma1, h_dgamma2, h_dgamma3, h_arho2b;
|
||||
DAT::tdual_ffloat_2d k_arho1, k_arho2, k_arho3, k_arho3b, k_t_ave, k_tsq_ave;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_arho1, d_arho2, d_arho3, d_arho3b, d_t_ave,
|
||||
d_tsq_ave;
|
||||
HAT::t_ffloat_2d h_arho1, h_arho2, h_arho3, h_arho3b, h_t_ave, h_tsq_ave;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_phir, d_phirar, d_phirar1, d_phirar2, d_phirar3,
|
||||
d_phirar4, d_phirar5, d_phirar6;
|
||||
DAT::tdual_ffloat_1d k_scrfcn, k_dscrfcn, k_fcpair;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_scrfcn, d_dscrfcn, d_fcpair;
|
||||
HAT::t_ffloat_1d h_scrfcn, h_dscrfcn, h_fcpair;
|
||||
|
||||
protected:
|
||||
int need_dup;
|
||||
using KKDeviceType = typename KKDevice<DeviceType>::value;
|
||||
|
||||
template <typename DataType, typename Layout>
|
||||
using DupScatterView =
|
||||
KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterDuplicated>;
|
||||
|
||||
template <typename DataType, typename Layout>
|
||||
using NonDupScatterView =
|
||||
KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
|
||||
|
||||
DupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout>
|
||||
dup_rho0;
|
||||
NonDupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout>
|
||||
ndup_rho0;
|
||||
DupScatterView<typename decltype(d_arho2b)::data_type, typename decltype(d_arho2b)::array_layout>
|
||||
dup_arho2b;
|
||||
NonDupScatterView<typename decltype(d_arho2b)::data_type,
|
||||
typename decltype(d_arho2b)::array_layout>
|
||||
ndup_arho2b;
|
||||
DupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout>
|
||||
dup_arho1;
|
||||
NonDupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout>
|
||||
ndup_arho1;
|
||||
DupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout>
|
||||
dup_arho2;
|
||||
NonDupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout>
|
||||
ndup_arho2;
|
||||
DupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout>
|
||||
dup_arho3;
|
||||
NonDupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout>
|
||||
ndup_arho3;
|
||||
DupScatterView<typename decltype(d_arho3b)::data_type, typename decltype(d_arho3b)::array_layout>
|
||||
dup_arho3b;
|
||||
NonDupScatterView<typename decltype(d_arho3b)::data_type,
|
||||
typename decltype(d_arho3b)::array_layout>
|
||||
ndup_arho3b;
|
||||
DupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout>
|
||||
dup_t_ave;
|
||||
NonDupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout>
|
||||
ndup_t_ave;
|
||||
DupScatterView<typename decltype(d_tsq_ave)::data_type,
|
||||
typename decltype(d_tsq_ave)::array_layout>
|
||||
dup_tsq_ave;
|
||||
NonDupScatterView<typename decltype(d_tsq_ave)::data_type,
|
||||
typename decltype(d_tsq_ave)::array_layout>
|
||||
ndup_tsq_ave;
|
||||
DupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> dup_f;
|
||||
NonDupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> ndup_f;
|
||||
DupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout>
|
||||
dup_eatom;
|
||||
NonDupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout>
|
||||
ndup_eatom;
|
||||
DupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout>
|
||||
dup_vatom;
|
||||
NonDupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout>
|
||||
ndup_vatom;
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool iszero_kk(const double f)
|
||||
{
|
||||
return fabs(f) < 1e-20;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool isone_kk(const double f)
|
||||
{
|
||||
return fabs(f - 1.0) < 1e-20;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double fdiv_zero_kk(const double n, const double d)
|
||||
{
|
||||
if (iszero_kk(d)) return 0.0;
|
||||
return n / d;
|
||||
}
|
||||
|
||||
// Functions we need for compat
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
#include "meam_impl_kokkos.h"
|
||||
|
||||
#endif
|
||||
60
src/KOKKOS/meam_setup_done_kokkos.h
Normal file
60
src/KOKKOS/meam_setup_done_kokkos.h
Normal file
@ -0,0 +1,60 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
template<class DeviceType>
|
||||
void MEAMKokkos<DeviceType>::meam_setup_done(double* cutmax)
|
||||
{
|
||||
MEAM::meam_setup_done(cutmax);
|
||||
|
||||
MemKK::realloc_kokkos(d_phir, "pair:phir", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar, "pair:phirar", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar1, "pair:phirar1", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar2, "pair:phirar2", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar3, "pair:phirar3", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar4, "pair:phirar4", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar5, "pair:phirar5", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar6, "pair:phirar6", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
|
||||
auto h_phir = Kokkos::create_mirror_view(d_phir);
|
||||
auto h_phirar = Kokkos::create_mirror_view(d_phirar);
|
||||
auto h_phirar1 = Kokkos::create_mirror_view(d_phirar1);
|
||||
auto h_phirar2 = Kokkos::create_mirror_view(d_phirar2);
|
||||
auto h_phirar3 = Kokkos::create_mirror_view(d_phirar3);
|
||||
auto h_phirar4 = Kokkos::create_mirror_view(d_phirar4);
|
||||
auto h_phirar5 = Kokkos::create_mirror_view(d_phirar5);
|
||||
auto h_phirar6 = Kokkos::create_mirror_view(d_phirar6);
|
||||
|
||||
for (int i = 0; i <(neltypes * (neltypes + 1)) / 2; i++)
|
||||
for(int j = 0; j < nr; j++) {
|
||||
h_phir(i,j) = phir[i][j];
|
||||
h_phirar(i,j) = phirar[i][j];
|
||||
h_phirar1(i,j) = phirar1[i][j];
|
||||
h_phirar2(i,j) = phirar2[i][j];
|
||||
h_phirar3(i,j) = phirar3[i][j];
|
||||
h_phirar4(i,j) = phirar4[i][j];
|
||||
h_phirar5(i,j) = phirar5[i][j];
|
||||
h_phirar6(i,j) = phirar6[i][j];
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_phir,h_phir);
|
||||
Kokkos::deep_copy(d_phirar,h_phirar);
|
||||
Kokkos::deep_copy(d_phirar1,h_phirar1);
|
||||
Kokkos::deep_copy(d_phirar2,h_phirar2);
|
||||
Kokkos::deep_copy(d_phirar3,h_phirar3);
|
||||
Kokkos::deep_copy(d_phirar4,h_phirar4);
|
||||
Kokkos::deep_copy(d_phirar5,h_phirar5);
|
||||
Kokkos::deep_copy(d_phirar6,h_phirar6);
|
||||
}
|
||||
753
src/KOKKOS/pair_meam_kokkos.cpp
Normal file
753
src/KOKKOS/pair_meam_kokkos.cpp
Normal file
@ -0,0 +1,753 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Naga Vydyanathan (NVIDIA), Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_meam_kokkos.h"
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "kokkos.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairMEAMKokkos<DeviceType>::PairMEAMKokkos(LAMMPS *lmp) : PairMEAM(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
kokkosable = 1;
|
||||
reverse_comm_device = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
|
||||
delete meam_inst;
|
||||
meam_inst_kk = new MEAMKokkos<DeviceType>(memory);
|
||||
meam_inst = meam_inst_kk;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairMEAMKokkos<DeviceType>::~PairMEAMKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
delete meam_inst_kk;
|
||||
meam_inst = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
ev_init(eflag,vflag,0);
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
// neighbor list info
|
||||
|
||||
int inum_half = listhalf->inum;
|
||||
NeighListKokkos<DeviceType>* k_halflist = static_cast<NeighListKokkos<DeviceType>*>(listhalf);
|
||||
d_ilist_half = k_halflist->d_ilist;
|
||||
d_numneigh_half = k_halflist->d_numneigh;
|
||||
d_neighbors_half = k_halflist->d_neighbors;
|
||||
|
||||
NeighListKokkos<DeviceType>* k_fulllist = static_cast<NeighListKokkos<DeviceType>*>(listfull);
|
||||
d_numneigh_full = k_fulllist->d_numneigh;
|
||||
d_neighbors_full = k_fulllist->d_neighbors;
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
copymode = 1;
|
||||
meam_inst_kk->copymode = 1;
|
||||
|
||||
// strip neighbor lists of any special bond flags before using with MEAM
|
||||
// necessary before doing neigh_f2c and neigh_c2f conversions each step
|
||||
|
||||
if (neighbor->ago == 0)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMNeighStrip >(0,inum_half),*this);
|
||||
|
||||
// check size of scrfcn based on half neighbor list
|
||||
|
||||
nlocal = atom->nlocal;
|
||||
nall = nlocal + atom->nghost;
|
||||
|
||||
int n = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMEAMOffsets>(0,inum_half),*this,n);
|
||||
|
||||
meam_inst_kk->meam_dens_setup(atom->nmax, nall, n);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
|
||||
int ntype = atom->ntypes;
|
||||
|
||||
// 3 stages of MEAM calculation
|
||||
// loop over my atoms followed by communication
|
||||
|
||||
int errorflag = 0;
|
||||
|
||||
d_offset = typename AT::t_int_1d("pair:offset",inum_half+1);
|
||||
{
|
||||
// local variables for lambda capture
|
||||
|
||||
auto l_ilist_half = d_ilist_half;
|
||||
auto l_numneigh_half = d_numneigh_half;
|
||||
auto l_offset = d_offset;
|
||||
|
||||
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
|
||||
int i = l_ilist_half[ii];
|
||||
m_fill += l_numneigh_half[i];
|
||||
if (final)
|
||||
l_offset[ii+1] = m_fill;
|
||||
});
|
||||
}
|
||||
|
||||
int need_dup = lmp->kokkos->need_dup<DeviceType>();
|
||||
|
||||
meam_inst_kk->meam_dens_init(inum_half,ntype,type,d_map,x,d_numneigh_half,d_numneigh_full,d_ilist_half,d_neighbors_half, d_neighbors_full, d_offset, neighflag, need_dup);
|
||||
|
||||
meam_inst_kk->k_rho0.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho2b.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho1.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho2.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho3.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho3b.template modify<DeviceType>();
|
||||
meam_inst_kk->k_t_ave.template modify<DeviceType>();
|
||||
meam_inst_kk->k_tsq_ave.template modify<DeviceType>();
|
||||
|
||||
comm->reverse_comm(this);
|
||||
|
||||
meam_inst_kk->k_rho0.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho2b.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho1.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho2.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho3.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho3b.template sync<DeviceType>();
|
||||
meam_inst_kk->k_t_ave.template sync<DeviceType>();
|
||||
meam_inst_kk->k_tsq_ave.template sync<DeviceType>();
|
||||
|
||||
meam_inst_kk->meam_dens_final(nlocal,eflag_either,eflag_global,eflag_atom,
|
||||
d_eatom,ntype,type,d_map,d_scale,errorflag,ev);
|
||||
|
||||
if (errorflag)
|
||||
error->one(FLERR,"MEAM library error {}",errorflag);
|
||||
|
||||
meam_inst_kk->k_rho0.template modify<DeviceType>();
|
||||
meam_inst_kk->k_rho1.template modify<DeviceType>();
|
||||
meam_inst_kk->k_rho2.template modify<DeviceType>();
|
||||
meam_inst_kk->k_rho3.template modify<DeviceType>();
|
||||
meam_inst_kk->k_frhop.template modify<DeviceType>();
|
||||
meam_inst_kk->k_gamma.template modify<DeviceType>();
|
||||
meam_inst_kk->k_dgamma1.template modify<DeviceType>();
|
||||
meam_inst_kk->k_dgamma2.template modify<DeviceType>();
|
||||
meam_inst_kk->k_dgamma3.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho2b.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho1.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho2.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho3.template modify<DeviceType>();
|
||||
meam_inst_kk->k_arho3b.template modify<DeviceType>();
|
||||
meam_inst_kk->k_t_ave.template modify<DeviceType>();
|
||||
meam_inst_kk->k_tsq_ave.template modify<DeviceType>();
|
||||
|
||||
comm->forward_comm(this);
|
||||
|
||||
meam_inst_kk->k_rho0.template sync<DeviceType>();
|
||||
meam_inst_kk->k_rho1.template sync<DeviceType>();
|
||||
meam_inst_kk->k_rho2.template sync<DeviceType>();
|
||||
meam_inst_kk->k_rho3.template sync<DeviceType>();
|
||||
meam_inst_kk->k_frhop.template sync<DeviceType>();
|
||||
meam_inst_kk->k_gamma.template sync<DeviceType>();
|
||||
meam_inst_kk->k_dgamma1.template sync<DeviceType>();
|
||||
meam_inst_kk->k_dgamma2.template sync<DeviceType>();
|
||||
meam_inst_kk->k_dgamma3.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho2b.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho1.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho2.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho3.template sync<DeviceType>();
|
||||
meam_inst_kk->k_arho3b.template sync<DeviceType>();
|
||||
meam_inst_kk->k_t_ave.template sync<DeviceType>();
|
||||
meam_inst_kk->k_tsq_ave.template sync<DeviceType>();
|
||||
|
||||
meam_inst_kk->meam_force(inum_half,eflag_global,eflag_atom,vflag_global,
|
||||
vflag_atom,d_eatom,ntype,type,d_map,x,
|
||||
d_numneigh_half, d_numneigh_full,f,d_vatom,
|
||||
d_ilist_half, d_offset, d_neighbors_half, d_neighbors_full,
|
||||
neighflag, need_dup, ev);
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.sync_host();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.sync_host();
|
||||
}
|
||||
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
copymode = 0;
|
||||
meam_inst_kk->copymode = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::coeff(int narg, char **arg)
|
||||
{
|
||||
PairMEAM::coeff(narg,arg);
|
||||
|
||||
// sync map and scale
|
||||
|
||||
int n = atom->ntypes;
|
||||
MemKK::realloc_kokkos(d_map,"pair:map",n+1);
|
||||
MemKK::realloc_kokkos(d_scale,"pair:scale",n+1,n+1);
|
||||
auto h_map = Kokkos::create_mirror_view(d_map);
|
||||
auto h_scale = Kokkos::create_mirror_view(d_scale);
|
||||
|
||||
for (int i = 1; i <= n; i++) {
|
||||
h_map[i] = map[i];
|
||||
for (int j = 1; j <= n; j++)
|
||||
h_scale(i,j) = scale[i][j];
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_map,h_map);
|
||||
Kokkos::deep_copy(d_scale,h_scale);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairMEAM::init_style();
|
||||
|
||||
// adjust neighbor list request for KOKKOS
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
auto request = neighbor->find_request(this,1);
|
||||
request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
|
||||
!std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
|
||||
request = neighbor->find_request(this,2);
|
||||
request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
|
||||
!std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
|
||||
|
||||
if (neighflag == FULL)
|
||||
error->all(FLERR,"Must use half neighbor list style with pair meam/kk");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
|
||||
int /*pbc_flag*/, int * /*pbc*/)
|
||||
{
|
||||
d_sendlist = k_sendlist.view<DeviceType>();
|
||||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMPackForwardComm>(0,n),*this);
|
||||
return n*38;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackForwardComm, const int &i) const {
|
||||
int j = d_sendlist(iswap, i);
|
||||
int m = i*38;
|
||||
v_buf[m++] = meam_inst_kk->d_rho0[j];
|
||||
v_buf[m++] = meam_inst_kk->d_rho1[j];
|
||||
v_buf[m++] = meam_inst_kk->d_rho2[j];
|
||||
v_buf[m++] = meam_inst_kk->d_rho3[j];
|
||||
v_buf[m++] = meam_inst_kk->d_frhop[j];
|
||||
v_buf[m++] = meam_inst_kk->d_gamma[j];
|
||||
v_buf[m++] = meam_inst_kk->d_dgamma1[j];
|
||||
v_buf[m++] = meam_inst_kk->d_dgamma2[j];
|
||||
v_buf[m++] = meam_inst_kk->d_dgamma3[j];
|
||||
v_buf[m++] = meam_inst_kk->d_arho2b[j];
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(j,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(j,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(j,2);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,2);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,3);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,4);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(j,5);
|
||||
for (int k = 0; k < 10; k++) v_buf[m++] = meam_inst_kk->d_arho3(j,k);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(j,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(j,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(j,2);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(j,0);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(j,1);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(j,2);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(j,0);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(j,1);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(j,2);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
|
||||
{
|
||||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMUnpackForwardComm>(0,n),*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackForwardComm, const int &i) const{
|
||||
int m = i*38;
|
||||
|
||||
meam_inst_kk->d_rho0[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_rho1[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_rho2[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_rho3[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_frhop[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_gamma[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_dgamma1[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_dgamma2[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_dgamma3[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_arho2b[i+first] = v_buf[m++];
|
||||
meam_inst_kk->d_arho1(i+first,0) = v_buf[m++];
|
||||
meam_inst_kk->d_arho1(i+first,1) = v_buf[m++];
|
||||
meam_inst_kk->d_arho1(i+first,2) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,0) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,1) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,2) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,3) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,4) = v_buf[m++];
|
||||
meam_inst_kk->d_arho2(i+first,5) = v_buf[m++];
|
||||
for (int k = 0; k < 10; k++) meam_inst_kk->d_arho3(i+first,k) = v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(i+first,0) = v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(i+first,1) = v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(i+first,2) = v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(i+first,0) = v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(i+first,1) = v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(i+first,2) = v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(i+first,0) = v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(i+first,1) = v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(i+first,2) = v_buf[m++];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMEAMKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
meam_inst_kk->k_rho0.sync_host();
|
||||
meam_inst_kk->k_rho1.sync_host();
|
||||
meam_inst_kk->k_rho2.sync_host();
|
||||
meam_inst_kk->k_rho3.sync_host();
|
||||
meam_inst_kk->k_frhop.sync_host();
|
||||
meam_inst_kk->k_gamma.sync_host();
|
||||
meam_inst_kk->k_dgamma1.sync_host();
|
||||
meam_inst_kk->k_dgamma2.sync_host();
|
||||
meam_inst_kk->k_dgamma3.sync_host();
|
||||
meam_inst_kk->k_arho2b.sync_host();
|
||||
meam_inst_kk->k_arho1.sync_host();
|
||||
meam_inst_kk->k_arho2.sync_host();
|
||||
meam_inst_kk->k_arho3.sync_host();
|
||||
meam_inst_kk->k_arho3b.sync_host();
|
||||
meam_inst_kk->k_t_ave.sync_host();
|
||||
meam_inst_kk->k_tsq_ave.sync_host();
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
const int j = list[i];
|
||||
buf[m++] = meam_inst_kk->h_rho0[j];
|
||||
buf[m++] = meam_inst_kk->h_rho1[j];
|
||||
buf[m++] = meam_inst_kk->h_rho2[j];
|
||||
buf[m++] = meam_inst_kk->h_rho3[j];
|
||||
buf[m++] = meam_inst_kk->h_frhop[j];
|
||||
buf[m++] = meam_inst_kk->h_gamma[j];
|
||||
buf[m++] = meam_inst_kk->h_dgamma1[j];
|
||||
buf[m++] = meam_inst_kk->h_dgamma2[j];
|
||||
buf[m++] = meam_inst_kk->h_dgamma3[j];
|
||||
buf[m++] = meam_inst_kk->h_arho2b[j];
|
||||
buf[m++] = meam_inst_kk->h_arho1(j,0);
|
||||
buf[m++] = meam_inst_kk->h_arho1(j,1);
|
||||
buf[m++] = meam_inst_kk->h_arho1(j,2);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,0);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,1);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,2);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,3);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,4);
|
||||
buf[m++] = meam_inst_kk->h_arho2(j,5);
|
||||
for (int k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(j,k);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(j,0);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(j,1);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(j,2);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(j,0);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(j,1);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(j,2);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(j,0);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(j,1);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(j,2);
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
|
||||
{
|
||||
meam_inst_kk->k_rho0.sync_host();
|
||||
meam_inst_kk->k_rho1.sync_host();
|
||||
meam_inst_kk->k_rho2.sync_host();
|
||||
meam_inst_kk->k_rho3.sync_host();
|
||||
meam_inst_kk->k_frhop.sync_host();
|
||||
meam_inst_kk->k_gamma.sync_host();
|
||||
meam_inst_kk->k_dgamma1.sync_host();
|
||||
meam_inst_kk->k_dgamma2.sync_host();
|
||||
meam_inst_kk->k_dgamma3.sync_host();
|
||||
meam_inst_kk->k_arho2b.sync_host();
|
||||
meam_inst_kk->k_arho1.sync_host();
|
||||
meam_inst_kk->k_arho2.sync_host();
|
||||
meam_inst_kk->k_arho3.sync_host();
|
||||
meam_inst_kk->k_arho3b.sync_host();
|
||||
meam_inst_kk->k_t_ave.sync_host();
|
||||
meam_inst_kk->k_tsq_ave.sync_host();
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
for (int i = first; i < last; i++) {
|
||||
meam_inst_kk->h_rho0[i] = buf[m++];
|
||||
meam_inst_kk->h_rho1[i] = buf[m++];
|
||||
meam_inst_kk->h_rho2[i] = buf[m++];
|
||||
meam_inst_kk->h_rho3[i] = buf[m++];
|
||||
meam_inst_kk->h_frhop[i] = buf[m++];
|
||||
meam_inst_kk->h_gamma[i] = buf[m++];
|
||||
meam_inst_kk->h_dgamma1[i] = buf[m++];
|
||||
meam_inst_kk->h_dgamma2[i] = buf[m++];
|
||||
meam_inst_kk->h_dgamma3[i] = buf[m++];
|
||||
meam_inst_kk->h_arho2b[i] = buf[m++];
|
||||
meam_inst_kk->h_arho1(i,0) = buf[m++];
|
||||
meam_inst_kk->h_arho1(i,1) = buf[m++];
|
||||
meam_inst_kk->h_arho1(i,2) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,0) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,1) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,2) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,3) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,4) = buf[m++];
|
||||
meam_inst_kk->h_arho2(i,5) = buf[m++];
|
||||
for (int k = 0; k < 10; k++) meam_inst_kk->h_arho3(i,k) = buf[m++];
|
||||
meam_inst_kk->h_arho3b(i,0) = buf[m++];
|
||||
meam_inst_kk->h_arho3b(i,1) = buf[m++];
|
||||
meam_inst_kk->h_arho3b(i,2) = buf[m++];
|
||||
meam_inst_kk->h_t_ave(i,0) = buf[m++];
|
||||
meam_inst_kk->h_t_ave(i,1) = buf[m++];
|
||||
meam_inst_kk->h_t_ave(i,2) = buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(i,0) = buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(i,1) = buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(i,2) = buf[m++];
|
||||
}
|
||||
|
||||
meam_inst_kk->k_rho0.modify_host();
|
||||
meam_inst_kk->k_rho1.modify_host();
|
||||
meam_inst_kk->k_rho2.modify_host();
|
||||
meam_inst_kk->k_rho3.modify_host();
|
||||
meam_inst_kk->k_frhop.modify_host();
|
||||
meam_inst_kk->k_gamma.modify_host();
|
||||
meam_inst_kk->k_dgamma1.modify_host();
|
||||
meam_inst_kk->k_dgamma2.modify_host();
|
||||
meam_inst_kk->k_dgamma3.modify_host();
|
||||
meam_inst_kk->k_arho2b.modify_host();
|
||||
meam_inst_kk->k_arho1.modify_host();
|
||||
meam_inst_kk->k_arho2.modify_host();
|
||||
meam_inst_kk->k_arho3.modify_host();
|
||||
meam_inst_kk->k_arho3b.modify_host();
|
||||
meam_inst_kk->k_t_ave.modify_host();
|
||||
meam_inst_kk->k_tsq_ave.modify_host();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMEAMKokkos<DeviceType>::pack_reverse_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
|
||||
{
|
||||
first = first_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMPackReverseComm>(0,n),*this);
|
||||
return n*30;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackReverseComm, const int &i) const {
|
||||
int m = i*30;
|
||||
|
||||
v_buf[m++] = meam_inst_kk->d_rho0[i+first];
|
||||
v_buf[m++] = meam_inst_kk->d_arho2b[i+first];
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(i+first,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(i+first,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho1(i+first,2);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,2);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,3);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,4);
|
||||
v_buf[m++] = meam_inst_kk->d_arho2(i+first,5);
|
||||
for (int k = 0; k < 10; k++) v_buf[m++] = meam_inst_kk->d_arho3(i+first,k);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,0);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,1);
|
||||
v_buf[m++] = meam_inst_kk->d_arho3b(i+first,2);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,0);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,1);
|
||||
v_buf[m++] = meam_inst_kk->d_t_ave(i+first,2);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,0);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,1);
|
||||
v_buf[m++] = meam_inst_kk->d_tsq_ave(i+first,2);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int PairMEAMKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
meam_inst_kk->k_rho0.sync_host();
|
||||
meam_inst_kk->k_arho2b.sync_host();
|
||||
meam_inst_kk->k_arho1.sync_host();
|
||||
meam_inst_kk->k_arho2.sync_host();
|
||||
meam_inst_kk->k_arho3.sync_host();
|
||||
meam_inst_kk->k_arho3b.sync_host();
|
||||
meam_inst_kk->k_t_ave.sync_host();
|
||||
meam_inst_kk->k_tsq_ave.sync_host();
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
for (int i = first; i < last; i++) {
|
||||
buf[m++] = meam_inst_kk->h_rho0[i];
|
||||
buf[m++] = meam_inst_kk->h_arho2b[i];
|
||||
buf[m++] = meam_inst_kk->h_arho1(i,0);
|
||||
buf[m++] = meam_inst_kk->h_arho1(i,1);
|
||||
buf[m++] = meam_inst_kk->h_arho1(i,2);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,0);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,1);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,2);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,3);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,4);
|
||||
buf[m++] = meam_inst_kk->h_arho2(i,5);
|
||||
for (int k = 0; k < 10; k++) buf[m++] = meam_inst_kk->h_arho3(i,k);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(i,0);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(i,1);
|
||||
buf[m++] = meam_inst_kk->h_arho3b(i,2);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(i,0);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(i,1);
|
||||
buf[m++] = meam_inst_kk->h_t_ave(i,2);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(i,0);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(i,1);
|
||||
buf[m++] = meam_inst_kk->h_tsq_ave(i,2);
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::unpack_reverse_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf)
|
||||
{
|
||||
d_sendlist = k_sendlist.view<DeviceType>();
|
||||
iswap = iswap_in;
|
||||
v_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMUnpackReverseComm>(0,n),*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackReverseComm, const int &i) const {
|
||||
int j = d_sendlist(iswap, i);
|
||||
int m = i*30;
|
||||
|
||||
meam_inst_kk->d_rho0[j] += v_buf[m++];
|
||||
meam_inst_kk->d_arho2b[j] += v_buf[m++];
|
||||
meam_inst_kk->d_arho1(j,0) += v_buf[m++];
|
||||
meam_inst_kk->d_arho1(j,1) += v_buf[m++];
|
||||
meam_inst_kk->d_arho1(j,2) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,0) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,1) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,2) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,3) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,4) += v_buf[m++];
|
||||
meam_inst_kk->d_arho2(j,5) += v_buf[m++];
|
||||
for (int k = 0; k < 10; k++) meam_inst_kk->d_arho3(j,k) += v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(j,0) += v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(j,1) += v_buf[m++];
|
||||
meam_inst_kk->d_arho3b(j,2) += v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(j,0) += v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(j,1) += v_buf[m++];
|
||||
meam_inst_kk->d_t_ave(j,2) += v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(j,0) += v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(j,1) += v_buf[m++];
|
||||
meam_inst_kk->d_tsq_ave(j,2) += v_buf[m++];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairMEAMKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
meam_inst_kk->k_rho0.sync_host();
|
||||
meam_inst_kk->k_arho2b.sync_host();
|
||||
meam_inst_kk->k_arho1.sync_host();
|
||||
meam_inst_kk->k_arho2.sync_host();
|
||||
meam_inst_kk->k_arho3.sync_host();
|
||||
meam_inst_kk->k_arho3b.sync_host();
|
||||
meam_inst_kk->k_t_ave.sync_host();
|
||||
meam_inst_kk->k_tsq_ave.sync_host();
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
const int j = list[i];
|
||||
meam_inst_kk->h_rho0[j] += buf[m++];
|
||||
meam_inst_kk->h_arho2b[j] += buf[m++];
|
||||
meam_inst_kk->h_arho1(j,0) += buf[m++];
|
||||
meam_inst_kk->h_arho1(j,1) += buf[m++];
|
||||
meam_inst_kk->h_arho1(j,2) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,0) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,1) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,2) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,3) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,4) += buf[m++];
|
||||
meam_inst_kk->h_arho2(j,5) += buf[m++];
|
||||
for (int k = 0; k < 10; k++) meam_inst_kk->h_arho3(j,k) += buf[m++];
|
||||
meam_inst_kk->h_arho3b(j,0) += buf[m++];
|
||||
meam_inst_kk->h_arho3b(j,1) += buf[m++];
|
||||
meam_inst_kk->h_arho3b(j,2) += buf[m++];
|
||||
meam_inst_kk->h_t_ave(j,0) += buf[m++];
|
||||
meam_inst_kk->h_t_ave(j,1) += buf[m++];
|
||||
meam_inst_kk->h_t_ave(j,2) += buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(j,0) += buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(j,1) += buf[m++];
|
||||
meam_inst_kk->h_tsq_ave(j,2) += buf[m++];
|
||||
}
|
||||
|
||||
meam_inst_kk->k_rho0.modify_host();
|
||||
meam_inst_kk->k_arho2b.modify_host();
|
||||
meam_inst_kk->k_arho1.modify_host();
|
||||
meam_inst_kk->k_arho2.modify_host();
|
||||
meam_inst_kk->k_arho3.modify_host();
|
||||
meam_inst_kk->k_arho3b.modify_host();
|
||||
meam_inst_kk->k_t_ave.modify_host();
|
||||
meam_inst_kk->k_tsq_ave.modify_host();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
strip special bond flags from neighbor list entries
|
||||
are not used with MEAM
|
||||
need to do here so Fortran lib doesn't see them
|
||||
done once per reneighbor so that neigh_f2c and neigh_c2f don't see them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMNeighStrip, const int &ii) const {
|
||||
|
||||
const int i = d_ilist_half[ii];
|
||||
const int jnum_half = d_numneigh_half[i];
|
||||
const int jnum_full = d_numneigh_full[i];
|
||||
for (int jj = 0; jj < jnum_half; jj++)
|
||||
d_neighbors_half(i,jj) &= NEIGHMASK;
|
||||
for (int jj = 0; jj < jnum_full; jj++)
|
||||
d_neighbors_full(i,jj) &= NEIGHMASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMOffsets, const int ii, int &n) const {
|
||||
const int i = d_ilist_half[ii];
|
||||
n += d_numneigh_half[i];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairMEAMKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
template class PairMEAMKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
|
||||
123
src/KOKKOS/pair_meam_kokkos.h
Normal file
123
src/KOKKOS/pair_meam_kokkos.h
Normal file
@ -0,0 +1,123 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
// clang-format off
|
||||
PairStyle(meam/c/kk,PairMEAMKokkos<LMPDeviceType>)
|
||||
PairStyle(meam/c/kk/device,PairMEAMKokkos<LMPDeviceType>)
|
||||
PairStyle(meam/c/kk/host,PairMEAMKokkos<LMPHostType>)
|
||||
PairStyle(meam/kk,PairMEAMKokkos<LMPDeviceType>)
|
||||
PairStyle(meam/kk/device,PairMEAMKokkos<LMPDeviceType>)
|
||||
PairStyle(meam/kk/host,PairMEAMKokkos<LMPHostType>)
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_PAIR_MEAM_KOKKOS_H
|
||||
#define LMP_PAIR_MEAM_KOKKOS_H
|
||||
|
||||
#include "kokkos_base.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "pair_meam.h"
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagPairMEAMNeighStrip{};
|
||||
struct TagPairMEAMOffsets{};
|
||||
struct TagPairMEAMPackForwardComm{};
|
||||
struct TagPairMEAMUnpackForwardComm{};
|
||||
struct TagPairMEAMPackReverseComm{};
|
||||
struct TagPairMEAMUnpackReverseComm{};
|
||||
|
||||
template<class DeviceType>
|
||||
class MEAMKokkos;
|
||||
|
||||
template<class DeviceType>
|
||||
class PairMEAMKokkos : public PairMEAM, public KokkosBase {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef int value_type;
|
||||
|
||||
PairMEAMKokkos(class LAMMPS *);
|
||||
~PairMEAMKokkos() override;
|
||||
void compute(int, int) override;
|
||||
void coeff(int, char **) override;
|
||||
void init_style() override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMPackForwardComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMUnpackForwardComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMPackReverseComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMUnpackReverseComm, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMNeighStrip, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPairMEAMOffsets, const int, int&) const;
|
||||
|
||||
int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
|
||||
int, int *) override;
|
||||
int pack_forward_comm(int, int *, double *, int, int *) override;
|
||||
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
|
||||
void unpack_forward_comm(int, int, double *) override;
|
||||
int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
|
||||
int pack_reverse_comm(int, int, double *) override;
|
||||
void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
|
||||
int, DAT::tdual_xfloat_1d&) override;
|
||||
void unpack_reverse_comm(int, int *, double *) override;
|
||||
|
||||
protected:
|
||||
class MEAMKokkos<DeviceType> *meam_inst_kk;
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d type;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
typename AT::t_int_1d d_offset;
|
||||
|
||||
DAT::tdual_int_1d k_map;
|
||||
typename AT::t_int_1d d_map;
|
||||
typename AT::t_int_2d d_scale;
|
||||
typename AT::t_int_1d d_ilist_half;
|
||||
typename AT::t_int_1d d_numneigh_half;
|
||||
typename AT::t_neighbors_2d d_neighbors_half;
|
||||
typename AT::t_int_1d d_numneigh_full;
|
||||
typename AT::t_neighbors_2d d_neighbors_full;
|
||||
typename AT::t_int_2d d_sendlist;
|
||||
typename AT::t_xfloat_1d_um v_buf;
|
||||
|
||||
int iswap,first;
|
||||
int neighflag,nlocal,nall,eflag,vflag;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairMEAMKokkos>(PairMEAMKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -27,9 +27,11 @@ typedef enum { FCC, BCC, HCP, DIM, DIA, DIA3, B1, C11, L12, B2, CH4, LIN, ZIG, T
|
||||
class MEAM {
|
||||
public:
|
||||
MEAM(Memory *mem);
|
||||
~MEAM();
|
||||
virtual ~MEAM();
|
||||
|
||||
private:
|
||||
int copymode;
|
||||
|
||||
protected:
|
||||
Memory *memory;
|
||||
|
||||
// cutforce = force cutoff
|
||||
@ -285,8 +287,8 @@ class MEAM {
|
||||
double *rozero, int *ibar);
|
||||
void meam_setup_param(int which, double value, int nindex, int *index /*index(3)*/,
|
||||
int *errorflag);
|
||||
void meam_setup_done(double *cutmax);
|
||||
void meam_dens_setup(int atom_nmax, int nall, int n_neigh);
|
||||
virtual void meam_setup_done(double *cutmax);
|
||||
virtual void meam_dens_setup(int atom_nmax, int nall, int n_neigh);
|
||||
void meam_dens_init(int i, int ntype, int *type, int *fmap, double **x, int numneigh,
|
||||
int *firstneigh, int numneigh_full, int *firstneigh_full, int fnoffset);
|
||||
void meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom,
|
||||
|
||||
@ -36,6 +36,7 @@ MEAM::MEAM(Memory* mem)
|
||||
|
||||
maxneigh = 0;
|
||||
scrfcn = dscrfcn = fcpair = nullptr;
|
||||
copymode = 0;
|
||||
|
||||
neltypes = 0;
|
||||
for (int i = 0; i < maxelt; i++) {
|
||||
@ -53,6 +54,8 @@ MEAM::MEAM(Memory* mem)
|
||||
|
||||
MEAM::~MEAM()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memory->destroy(this->phirar6);
|
||||
memory->destroy(this->phirar5);
|
||||
memory->destroy(this->phirar4);
|
||||
|
||||
@ -73,7 +73,10 @@ PairMEAM::PairMEAM(LAMMPS *lmp) : Pair(lmp)
|
||||
|
||||
PairMEAM::~PairMEAM()
|
||||
{
|
||||
delete meam_inst;
|
||||
if (copymode) return;
|
||||
|
||||
if (meam_inst)
|
||||
delete meam_inst;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
|
||||
@ -43,7 +43,7 @@ class PairMEAM : public Pair {
|
||||
void unpack_reverse_comm(int, int *, double *) override;
|
||||
double memory_usage() override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
class MEAM *meam_inst;
|
||||
double cutmax; // max cutoff for all elements
|
||||
int nlibelements; // # of library elements
|
||||
|
||||
@ -126,6 +126,7 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
|
||||
datamask_modify = ALL_MASK;
|
||||
|
||||
kokkosable = 0;
|
||||
reverse_comm_device = 0;
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -123,6 +123,7 @@ class Pair : protected Pointers {
|
||||
ExecutionSpace execution_space;
|
||||
unsigned int datamask_read, datamask_modify;
|
||||
int kokkosable; // 1 if Kokkos pair
|
||||
int reverse_comm_device; // 1 if reverse comm on Device
|
||||
|
||||
Pair(class LAMMPS *);
|
||||
~Pair() override;
|
||||
|
||||
Reference in New Issue
Block a user