Fix issues in KOKKOS package
This commit is contained in:
@ -217,7 +217,6 @@ action fix_wall_region_kokkos.cpp
|
||||
action fix_wall_region_kokkos.h
|
||||
action grid3d_kokkos.cpp fft3d.h
|
||||
action grid3d_kokkos.h fft3d.h
|
||||
action group_kokkos.cpp
|
||||
action group_kokkos.h
|
||||
action improper_class2_kokkos.cpp improper_class2.cpp
|
||||
action improper_class2_kokkos.h improper_class2.h
|
||||
|
||||
@ -690,7 +690,7 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,nsend), KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
|
||||
|
||||
const int i = d_exchange_sendlist(mysend);
|
||||
|
||||
@ -782,7 +782,7 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nrecv), KOKKOS_LAMBDA(const int &i) {
|
||||
int index = d_indices(i);
|
||||
if (index > -1) {
|
||||
int m = d_ubuf(d_buf(i)).i;
|
||||
|
||||
@ -36,7 +36,7 @@ FixMomentumKokkos<DeviceType>::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
groupKK = (GroupKokkos<DeviceType> *)group;
|
||||
groupKK = (GroupKokkos *)group;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
@ -94,7 +94,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
double ekin_old,ekin_new;
|
||||
ekin_old = ekin_new = 0.0;
|
||||
|
||||
if (dynamic) masstotal = groupKK->mass(igroup);
|
||||
if (dynamic) masstotal = groupKK->mass_kk<DeviceType>(igroup);
|
||||
|
||||
// do nothing if group is empty, i.e. mass is zero;
|
||||
|
||||
@ -109,7 +109,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
auto groupbit2 = groupbit;
|
||||
if (linear) {
|
||||
double vcm[3];
|
||||
groupKK->vcm(igroup,masstotal,vcm);
|
||||
groupKK->vcm_kk<DeviceType>(igroup,masstotal,vcm);
|
||||
|
||||
// adjust velocities by vcm to zero linear momentum
|
||||
// only adjust a component if flag is set
|
||||
@ -131,9 +131,9 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
|
||||
if (angular) {
|
||||
double xcm[3],angmom[3],omega[3],inertia[3][3];
|
||||
groupKK->xcm(igroup,masstotal,xcm);
|
||||
groupKK->angmom(igroup,xcm,angmom);
|
||||
groupKK->inertia(igroup,xcm,inertia);
|
||||
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
|
||||
groupKK->angmom_kk<DeviceType>(igroup,xcm,angmom);
|
||||
groupKK->inertia_kk<DeviceType>(igroup,xcm,inertia);
|
||||
group->omega(angmom,inertia,omega);
|
||||
|
||||
// adjust velocities to zero omega
|
||||
|
||||
@ -38,7 +38,7 @@ class FixMomentumKokkos : public FixMomentum {
|
||||
FixMomentumKokkos(class LAMMPS *, int, char **);
|
||||
void end_of_step() override;
|
||||
private:
|
||||
GroupKokkos<DeviceType> *groupKK;
|
||||
GroupKokkos *groupKK;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_rmass[i];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -95,7 +95,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
auto l_groupbit = groupbit;
|
||||
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_mass[d_type[i]];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -144,7 +144,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_rmass[i];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -168,7 +168,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_mass[d_type[i]];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
|
||||
@ -38,7 +38,7 @@ FixRecenterKokkos<DeviceType>::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
groupKK = (GroupKokkos<DeviceType> *)group;
|
||||
groupKK = (GroupKokkos *)group;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | MASK_MASK;
|
||||
@ -87,9 +87,10 @@ void FixRecenterKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
// current COM
|
||||
|
||||
if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup);
|
||||
|
||||
if (group->dynamic[igroup]) masstotal = groupKK->mass_kk<DeviceType>(igroup);
|
||||
double xcm[3];
|
||||
groupKK->xcm(igroup,masstotal,xcm);
|
||||
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
|
||||
|
||||
// shift coords by difference between actual COM and requested COM
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@ class FixRecenterKokkos : public FixRecenter {
|
||||
FixRecenterKokkos(class LAMMPS *, int, char **);
|
||||
void initial_integrate(int) override;
|
||||
private:
|
||||
GroupKokkos<DeviceType> *groupKK;
|
||||
GroupKokkos *groupKK;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -1859,7 +1859,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if 0
|
||||
Kokkos::parallel_for ( inum,
|
||||
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,inum),
|
||||
LAMMPS_LAMBDA(const int ii)
|
||||
{
|
||||
// Create an atomic view of sumWeights and dpdThetaLocal. Only needed
|
||||
@ -1939,7 +1939,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
|
||||
// self-interaction for local temperature
|
||||
#if 0
|
||||
Kokkos::parallel_for ( nlocal,
|
||||
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,nlocal),
|
||||
LAMMPS_LAMBDA(const int i)
|
||||
{
|
||||
double wij = 0.0;
|
||||
|
||||
@ -123,7 +123,7 @@ void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
auto l_yflag = yflag;
|
||||
auto l_zflag = zflag;
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(const int& i, double& espring_kk) {
|
||||
if (l_mask[i] & l_groupbit) {
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = l_x(i,0);
|
||||
|
||||
@ -15,22 +15,352 @@
|
||||
#define LMP_GROUP_KOKKOS_H
|
||||
|
||||
#include "group.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "kokkos_few.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class GroupKokkos : public Group {
|
||||
public:
|
||||
GroupKokkos(class LAMMPS *);
|
||||
double mass(int); // total mass of atoms in group
|
||||
void xcm(int, double, double *); // center-of-mass coords of group
|
||||
void vcm(int, double, double *); // center-of-mass velocity of group
|
||||
void angmom(int, double *, double *); // angular momentum of group
|
||||
void inertia(int, double *, double[3][3]); // inertia tensor
|
||||
GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; }
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// computations on a group of atoms
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the total mass of group of atoms
|
||||
use either per-type mass or per-atom rmass
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double mass_kk(int igroup)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
double one = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_rmass(i);
|
||||
}, one);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
|
||||
}, one);
|
||||
|
||||
}
|
||||
|
||||
double all;
|
||||
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
|
||||
return all;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass coords of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass coords in cm[]
|
||||
must unwrap atoms to compute center-of-mass correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void xcm_kk(int igroup, double masstotal, double *xcm)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
double cmone[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
xcm[0] /= masstotal;
|
||||
xcm[1] /= masstotal;
|
||||
xcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass velocity of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass velocity in vcm[]
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void vcm_kk(int igroup, double masstotal, double *vcm)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
vcm[0] /= masstotal;
|
||||
vcm[1] /= masstotal;
|
||||
vcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the angular momentum L (lmom) of group
|
||||
around center-of-mass cm
|
||||
must unwrap atoms to compute L correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void angmom_kk(int igroup, double *xcm, double *lmom)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute moment of inertia tensor around center-of-mass xcm of group
|
||||
must unwrap atoms to compute itensor correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void inertia_kk(int igroup, double *xcm, double itensor[3][3])
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
|
||||
double ione[3][3];
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
}
|
||||
|
||||
ione[1][0] = ione[0][1];
|
||||
ione[2][1] = ione[1][2];
|
||||
ione[2][0] = ione[0][2];
|
||||
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
private:
|
||||
ExecutionSpace execution_space;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -145,13 +145,13 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
|
||||
auto type = atomKK->k_type.view<DeviceType>();
|
||||
auto map=k_pairmliap->k_map.template view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_scan(natomneigh, KOKKOS_LAMBDA (int ii, int &update, const bool final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &update, const bool final) {
|
||||
if (final)
|
||||
d_ij(ii) = update;
|
||||
update += d_numneighs(ii);
|
||||
});
|
||||
|
||||
Kokkos::parallel_for(natomneigh, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii) {
|
||||
int ij = d_ij(ii);
|
||||
const int i = d_ilist[ii];
|
||||
const double xtmp = x(i, 0);
|
||||
@ -183,7 +183,7 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
|
||||
d_ielems[ii] = ielem;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for(nmax, KOKKOS_LAMBDA (int i) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nmax), KOKKOS_LAMBDA (int i) {
|
||||
const int itype = type(i);
|
||||
d_elems(i) = map(itype);
|
||||
});
|
||||
@ -225,7 +225,7 @@ void MLIAPDataKokkos<DeviceType>::grow_neigharrays() {
|
||||
auto d_cutsq=k_pairmliap->k_cutsq.template view<DeviceType>();
|
||||
auto h_cutsq=k_pairmliap->k_cutsq.template view<LMPHostType>();
|
||||
auto d_numneighs = k_numneighs.template view<DeviceType>();
|
||||
Kokkos::parallel_reduce(natomneigh, KOKKOS_LAMBDA (int ii, int &contrib) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &contrib) {
|
||||
const int i = d_ilist[ii];
|
||||
int count=0;
|
||||
const double xtmp = x(i, 0);
|
||||
|
||||
@ -75,7 +75,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_forces(class MLIAPData *data_
|
||||
Kokkos::View<double[6], DeviceType> virial("virial");
|
||||
data->k_pairmliap->k_vatom.template modify<LMPHostType>();
|
||||
data->k_pairmliap->k_vatom.template sync<DeviceType>();
|
||||
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA(int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA(int ii) {
|
||||
double fij[3];
|
||||
const int i = d_iatoms(ii);
|
||||
|
||||
@ -187,7 +187,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_force_gradients(class MLIAPDa
|
||||
|
||||
auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz;
|
||||
|
||||
Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
|
||||
const int i = d_iatoms(ii);
|
||||
|
||||
// ensure rij, inside, wj, and rcutij are of size jnum
|
||||
|
||||
@ -147,7 +147,7 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
auto l_numneigh_half = d_numneigh_half;
|
||||
auto l_offset = d_offset;
|
||||
|
||||
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,inum_half), LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
|
||||
int i = l_ilist_half[ii];
|
||||
m_fill += l_numneigh_half[i];
|
||||
if (final)
|
||||
|
||||
@ -302,7 +302,7 @@ void PairMLIAPKokkos<DeviceType>::e_tally(MLIAPData* data)
|
||||
auto d_iatoms = k_data->k_iatoms.template view<DeviceType>();
|
||||
auto d_eatoms = k_data->k_eatoms.template view<DeviceType>();
|
||||
auto d_eatom = k_eatom.template view<DeviceType>();
|
||||
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
|
||||
d_eatom(d_iatoms(ii)) = d_eatoms(ii);
|
||||
});
|
||||
k_eatom.modify<DeviceType>();
|
||||
|
||||
@ -532,7 +532,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
|
||||
auto l_neighbors = d_neighbors;
|
||||
|
||||
// compute number of pairs for each atom i
|
||||
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<DeviceType>::member_type& team) {
|
||||
int i = team.league_rank();
|
||||
int gi = l_ilist(gi1 + i);
|
||||
double xi0 = l_x(gi, 0);
|
||||
@ -555,7 +555,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
|
||||
});
|
||||
|
||||
// accumalative sum
|
||||
Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) {
|
||||
Kokkos::parallel_scan("InclusivePrefixSum", Kokkos::RangePolicy<DeviceType>(0,Ni + 1), KOKKOS_LAMBDA(int i, int& update, const bool final) {
|
||||
if (i > 0) {
|
||||
update += l_numij(i);
|
||||
if (final) {
|
||||
@ -582,7 +582,7 @@ void PairPODKokkos<DeviceType>::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,
|
||||
auto l_map = d_map;
|
||||
auto l_type = type;
|
||||
|
||||
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<DeviceType>::member_type& team) {
|
||||
int i = team.league_rank();
|
||||
int gi = l_ilist(gi1 + i);
|
||||
double xi0 = l_x(gi, 0);
|
||||
@ -622,7 +622,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
|
||||
t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree,
|
||||
int l_inversedegree, int l_nbesselpars, int Nij)
|
||||
{
|
||||
Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("ComputeRadialBasis", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
|
||||
double xij1 = l_rij(0+3*n);
|
||||
double xij2 = l_rij(1+3*n);
|
||||
double xij3 = l_rij(2+3*n);
|
||||
@ -722,7 +722,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
|
||||
template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2)
|
||||
{
|
||||
Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("MatrixMultiply", Kokkos::RangePolicy<DeviceType>(0,r1 * c2), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / r1; // Calculate column index
|
||||
int i = idx % r1; // Calculate row index
|
||||
double sum = 0.0;
|
||||
@ -737,7 +737,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
|
||||
t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N)
|
||||
{
|
||||
Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) {
|
||||
Kokkos::parallel_for("AngularBasis", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int j) {
|
||||
double x = l_rij(j*3 + 0);
|
||||
double y = l_rij(j*3 + 1);
|
||||
double z = l_rij(j*3 + 2);
|
||||
@ -817,7 +817,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
|
||||
{
|
||||
int totalIterations = l_nrbf3 * l_K3 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int k = idx % l_K3;
|
||||
int temp = idx / l_K3;
|
||||
int m = temp % l_nrbf3;
|
||||
@ -835,7 +835,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
|
||||
});
|
||||
}
|
||||
else {
|
||||
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int k = idx % l_K3;
|
||||
int temp = idx / l_K3;
|
||||
int m = temp % l_nrbf3;
|
||||
@ -863,7 +863,7 @@ void PairPODKokkos<DeviceType>::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_
|
||||
int l_nrbf2, const int Ni, const int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf2 * Nij;
|
||||
Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("twobodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx / l_nrbf2; // pair index
|
||||
int m = idx % l_nrbf2; // rbd index
|
||||
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
|
||||
@ -876,7 +876,7 @@ void PairPODKokkos<DeviceType>::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod
|
||||
t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf2 * Nij;
|
||||
Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("twobody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx / l_nrbf2; // pair index
|
||||
int m = idx % l_nrbf2; // rbd index
|
||||
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
|
||||
@ -893,7 +893,7 @@ void PairPODKokkos<DeviceType>::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po
|
||||
int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni)
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Ni;
|
||||
Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("ThreeBodyDesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int m = idx % l_nrbf3;
|
||||
int i = idx / l_nrbf3;
|
||||
int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i;
|
||||
@ -925,7 +925,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Nij;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("threebody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
int idxR = j + Nij * m; // Pre-compute the index for rbf
|
||||
@ -961,7 +961,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf3 * l_nrbf3;
|
||||
Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Derive the original j value
|
||||
int m = idx % l_nrbf3; // Derive the original m value
|
||||
int i2 = l_tj(j) - 1;
|
||||
@ -1007,7 +1007,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
for (int p = 0; p < l_nabf3; p++) {
|
||||
@ -1024,7 +1024,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf3 * l_nrbf3;
|
||||
Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf3; // Derive the original j value
|
||||
int m = idx % l_nrbf3; // Derive the original m value
|
||||
for (int p = 0; p < l_nabf3; p++) {
|
||||
@ -1054,7 +1054,7 @@ void PairPODKokkos<DeviceType>::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_po
|
||||
t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni)
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Ni;
|
||||
Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int m = idx % l_nrbf4;
|
||||
int i = idx / l_nrbf4;
|
||||
int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i;
|
||||
@ -1092,7 +1092,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Nij;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forces1", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);
|
||||
@ -1151,7 +1151,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf4 * l_nrbf4;
|
||||
Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forces2", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxM = j + Nij * m;
|
||||
@ -1241,7 +1241,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxU = l_K3 * m + l_K3*l_nrbf3*i;
|
||||
@ -1268,7 +1268,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf4 * l_nrbf4;
|
||||
Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
for (int p = 0; p < l_nabf4; p++) {
|
||||
@ -1311,7 +1311,7 @@ void PairPODKokkos<DeviceType>::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe
|
||||
t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_K3, int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Nij;
|
||||
Kokkos::parallel_for("allbody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("allbody_forces", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
int i2 = l_tj(j) - 1;
|
||||
@ -1346,7 +1346,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
|
||||
{
|
||||
int totalIterations = n12 * Ni;
|
||||
Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("crossdesc", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int i = idx / Ni;
|
||||
|
||||
@ -1359,7 +1359,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
|
||||
t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
|
||||
{
|
||||
int totalIterations = n12 * Ni;
|
||||
Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("crossdesc_reduction", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni; // Ni
|
||||
int m = idx / Ni; // n12
|
||||
int k1 = ind1(m); // dd1
|
||||
@ -1375,7 +1375,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
|
||||
template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::set_array_to_zero(t_pod_1d a, int N)
|
||||
{
|
||||
Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) {
|
||||
Kokkos::parallel_for("initialize_array", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int i) {
|
||||
a(i) = 0.0;
|
||||
});
|
||||
}
|
||||
@ -1480,7 +1480,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
|
||||
int nDes = Mdesc;
|
||||
int nCoeff = nCoeffPerElement;
|
||||
|
||||
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
ei[n] = cefs[0 + nc];
|
||||
for (int m=0; m<nDes; m++)
|
||||
@ -1488,7 +1488,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
|
||||
});
|
||||
|
||||
int totalIterations = Ni*nDes;
|
||||
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1516,7 +1516,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
int nCoeff = nCoeffPerElement;
|
||||
|
||||
int totalIterations = Ni*nCom;
|
||||
Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("pca", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int k = idx / Ni;
|
||||
double sum = 0.0;
|
||||
@ -1528,7 +1528,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
});
|
||||
|
||||
totalIterations = Ni*nCls;
|
||||
Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("inverse_square_distances", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int j = idx / Ni;
|
||||
int typei = tyai[i]-1;
|
||||
@ -1541,14 +1541,14 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
D[i + Ni*j] = 1.0 / sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) {
|
||||
Kokkos::parallel_for("Probabilities", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int i) {
|
||||
double sum = 0;
|
||||
for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];
|
||||
sumD[i] = sum;
|
||||
for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
ei[n] = cefs[0 + nc];
|
||||
for (int k = 0; k<nCls; k++)
|
||||
@ -1556,7 +1556,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int k = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1567,7 +1567,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
});
|
||||
|
||||
totalIterations = Ni*nDes;
|
||||
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1577,7 +1577,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
cb[n + Ni*m] = sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int typei = tyai[i]-1;
|
||||
@ -1670,7 +1670,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij)
|
||||
{
|
||||
auto l_f = f;
|
||||
Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("TallyForce", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
|
||||
int im = l_ai(n);
|
||||
int jm = l_aj(n);
|
||||
int n3 = 3*n;
|
||||
@ -1694,7 +1694,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
|
||||
// For global energy tally
|
||||
if (eflag_global) {
|
||||
double local_eng_vdwl = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k, E_FLOAT& update) {
|
||||
update += l_ei(k);
|
||||
}, local_eng_vdwl);
|
||||
|
||||
@ -1704,7 +1704,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
|
||||
|
||||
// For per-atom energy tally
|
||||
if (eflag_atom) {
|
||||
Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) {
|
||||
Kokkos::parallel_for("PerAtomEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k) {
|
||||
l_eatom(istart + k) += l_ei(k);
|
||||
});
|
||||
}
|
||||
@ -1718,7 +1718,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
if (vflag_global) {
|
||||
for (int j=0; j<3; j++) {
|
||||
F_FLOAT sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(j + k3) * l_fij(j + k3);
|
||||
}, sum);
|
||||
@ -1726,21 +1726,21 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
}
|
||||
|
||||
F_FLOAT sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(k3) * l_fij(1 + k3);
|
||||
}, sum);
|
||||
virial[3] -= sum;
|
||||
|
||||
sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(k3) * l_fij(2 + k3);
|
||||
}, sum);
|
||||
virial[4] -= sum;
|
||||
|
||||
sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(1+k3) * l_fij(2+k3);
|
||||
}, sum);
|
||||
@ -1748,7 +1748,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) {
|
||||
Kokkos::parallel_for("PerAtomStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k) {
|
||||
int i = l_ai(k);
|
||||
int j = l_aj(k);
|
||||
int k3 = 3*k;
|
||||
|
||||
@ -53,6 +53,8 @@ lmpinstalledpkgs.h
|
||||
lmpgitversion.h
|
||||
mliap_model_python_couple.cpp
|
||||
mliap_model_python_couple.h
|
||||
# removed in Dec 2024
|
||||
group_kokkos.cpp
|
||||
# renamed in September 2024
|
||||
group_ndx.cpp
|
||||
group_ndx.h
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include "comm_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_tiled_kokkos.h" // IWYU pragma: export
|
||||
#include "domain_kokkos.h" // IWYU pragma: export
|
||||
#include "group_kokkos.h" // IWYU pragma: export
|
||||
#include "kokkos.h" // IWYU pragma: export
|
||||
#include "memory_kokkos.h" // IWYU pragma: export
|
||||
#include "modify_kokkos.h" // IWYU pragma: export
|
||||
@ -39,6 +40,7 @@
|
||||
#include "comm_brick.h"
|
||||
#include "comm_tiled.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "neighbor.h"
|
||||
@ -86,6 +88,11 @@ class DomainKokkos : public Domain {
|
||||
DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {}
|
||||
};
|
||||
|
||||
class GroupKokkos : public Group {
|
||||
public:
|
||||
GroupKokkos(class LAMMPS *lmp) : Group(lmp) {}
|
||||
};
|
||||
|
||||
class NeighborKokkos : public Neighbor {
|
||||
public:
|
||||
NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {}
|
||||
|
||||
@ -872,7 +872,9 @@ void LAMMPS::create()
|
||||
else
|
||||
atom->create_avec("atomic",0,nullptr,1);
|
||||
|
||||
group = new Group(this);
|
||||
if (kokkos) group = new GroupKokkos(this);
|
||||
else group = new Group(this);
|
||||
|
||||
force = new Force(this); // must be after group, to create temperature
|
||||
|
||||
if (kokkos) modify = new ModifyKokkos(this);
|
||||
|
||||
Reference in New Issue
Block a user