Fix issues in KOKKOS package

This commit is contained in:
Stan Moore
2024-12-03 16:20:34 -08:00
parent cd16308d71
commit 50df32f6fe
18 changed files with 421 additions and 80 deletions

View File

@ -217,7 +217,6 @@ action fix_wall_region_kokkos.cpp
action fix_wall_region_kokkos.h
action grid3d_kokkos.cpp fft3d.h
action grid3d_kokkos.h fft3d.h
action group_kokkos.cpp
action group_kokkos.h
action improper_class2_kokkos.cpp improper_class2.cpp
action improper_class2_kokkos.h improper_class2.h

View File

@ -690,7 +690,7 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
copymode = 1;
Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,nsend), KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
const int i = d_exchange_sendlist(mysend);
@ -782,7 +782,7 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
copymode = 1;
Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nrecv), KOKKOS_LAMBDA(const int &i) {
int index = d_indices(i);
if (index > -1) {
int m = d_ubuf(d_buf(i)).i;

View File

@ -36,7 +36,7 @@ FixMomentumKokkos<DeviceType>::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
groupKK = (GroupKokkos<DeviceType> *)group;
groupKK = (GroupKokkos *)group;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
@ -94,7 +94,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
double ekin_old,ekin_new;
ekin_old = ekin_new = 0.0;
if (dynamic) masstotal = groupKK->mass(igroup);
if (dynamic) masstotal = groupKK->mass_kk<DeviceType>(igroup);
// do nothing if group is empty, i.e. mass is zero;
@ -109,7 +109,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
auto groupbit2 = groupbit;
if (linear) {
double vcm[3];
groupKK->vcm(igroup,masstotal,vcm);
groupKK->vcm_kk<DeviceType>(igroup,masstotal,vcm);
// adjust velocities by vcm to zero linear momentum
// only adjust a component if flag is set
@ -131,9 +131,9 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
if (angular) {
double xcm[3],angmom[3],omega[3],inertia[3][3];
groupKK->xcm(igroup,masstotal,xcm);
groupKK->angmom(igroup,xcm,angmom);
groupKK->inertia(igroup,xcm,inertia);
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
groupKK->angmom_kk<DeviceType>(igroup,xcm,angmom);
groupKK->inertia_kk<DeviceType>(igroup,xcm,inertia);
group->omega(angmom,inertia,omega);
// adjust velocities to zero omega

View File

@ -38,7 +38,7 @@ class FixMomentumKokkos : public FixMomentum {
FixMomentumKokkos(class LAMMPS *, int, char **);
void end_of_step() override;
private:
GroupKokkos<DeviceType> *groupKK;
GroupKokkos *groupKK;
};
}

View File

@ -66,7 +66,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_rmass[i];
d_v(i,0) += dtfm * d_f(i,0);
@ -95,7 +95,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
auto l_groupbit = groupbit;
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_mass[d_type[i]];
d_v(i,0) += dtfm * d_f(i,0);
@ -144,7 +144,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_rmass[i];
d_v(i,0) += dtfm * d_f(i,0);
@ -168,7 +168,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_mass[d_type[i]];
d_v(i,0) += dtfm * d_f(i,0);

View File

@ -38,7 +38,7 @@ FixRecenterKokkos<DeviceType>::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a
{
kokkosable = 1;
atomKK = (AtomKokkos *)atom;
groupKK = (GroupKokkos<DeviceType> *)group;
groupKK = (GroupKokkos *)group;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | MASK_MASK;
@ -87,9 +87,10 @@ void FixRecenterKokkos<DeviceType>::initial_integrate(int /*vflag*/)
// current COM
if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup);
if (group->dynamic[igroup]) masstotal = groupKK->mass_kk<DeviceType>(igroup);
double xcm[3];
groupKK->xcm(igroup,masstotal,xcm);
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
// shift coords by difference between actual COM and requested COM

View File

@ -36,7 +36,7 @@ class FixRecenterKokkos : public FixRecenter {
FixRecenterKokkos(class LAMMPS *, int, char **);
void initial_integrate(int) override;
private:
GroupKokkos<DeviceType> *groupKK;
GroupKokkos *groupKK;
};
} // namespace LAMMPS_NS

View File

@ -1859,7 +1859,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
// loop over neighbors of my atoms
#if 0
Kokkos::parallel_for ( inum,
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,inum),
LAMMPS_LAMBDA(const int ii)
{
// Create an atomic view of sumWeights and dpdThetaLocal. Only needed
@ -1939,7 +1939,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
// self-interaction for local temperature
#if 0
Kokkos::parallel_for ( nlocal,
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,nlocal),
LAMMPS_LAMBDA(const int i)
{
double wij = 0.0;

View File

@ -123,7 +123,7 @@ void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
auto l_yflag = yflag;
auto l_zflag = zflag;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(const int& i, double& espring_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);

View File

@ -15,22 +15,352 @@
#define LMP_GROUP_KOKKOS_H
#include "group.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "kokkos_few.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class GroupKokkos : public Group {
public:
GroupKokkos(class LAMMPS *);
double mass(int); // total mass of atoms in group
void xcm(int, double, double *); // center-of-mass coords of group
void vcm(int, double, double *); // center-of-mass velocity of group
void angmom(int, double *, double *); // angular momentum of group
void inertia(int, double *, double[3][3]); // inertia tensor
GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; }
// ----------------------------------------------------------------------
// computations on a group of atoms
// ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
compute the total mass of group of atoms
use either per-type mass or per-atom rmass
------------------------------------------------------------------------- */
template<class DeviceType>
double mass_kk(int igroup)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_mask = atomKK->k_mask.template view<DeviceType>();
double one = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_rmass(i);
}, one);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
}, one);
}
double all;
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
return all;
}
/* ----------------------------------------------------------------------
compute the center-of-mass coords of group of atoms
masstotal = total mass
return center-of-mass coords in cm[]
must unwrap atoms to compute center-of-mass correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void xcm_kk(int igroup, double masstotal, double *xcm)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
double cmone[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
}
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
xcm[0] /= masstotal;
xcm[1] /= masstotal;
xcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the center-of-mass velocity of group of atoms
masstotal = total mass
return center-of-mass velocity in vcm[]
------------------------------------------------------------------------- */
template<class DeviceType>
void vcm_kk(int igroup, double masstotal, double *vcm)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
vcm[0] /= masstotal;
vcm[1] /= masstotal;
vcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the angular momentum L (lmom) of group
around center-of-mass cm
must unwrap atoms to compute L correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void angmom_kk(int igroup, double *xcm, double *lmom)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
}
/* ----------------------------------------------------------------------
compute moment of inertia tensor around center-of-mass xcm of group
must unwrap atoms to compute itensor correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void inertia_kk(int igroup, double *xcm, double itensor[3][3])
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double ione[3][3];
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
}
ione[1][0] = ione[0][1];
ione[2][1] = ione[1][2];
ione[2][0] = ione[0][2];
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
}
private:
ExecutionSpace execution_space;
};
} // namespace LAMMPS_NS

View File

@ -145,13 +145,13 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
auto type = atomKK->k_type.view<DeviceType>();
auto map=k_pairmliap->k_map.template view<DeviceType>();
Kokkos::parallel_scan(natomneigh, KOKKOS_LAMBDA (int ii, int &update, const bool final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &update, const bool final) {
if (final)
d_ij(ii) = update;
update += d_numneighs(ii);
});
Kokkos::parallel_for(natomneigh, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii) {
int ij = d_ij(ii);
const int i = d_ilist[ii];
const double xtmp = x(i, 0);
@ -183,7 +183,7 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
d_ielems[ii] = ielem;
});
Kokkos::parallel_for(nmax, KOKKOS_LAMBDA (int i) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nmax), KOKKOS_LAMBDA (int i) {
const int itype = type(i);
d_elems(i) = map(itype);
});
@ -225,7 +225,7 @@ void MLIAPDataKokkos<DeviceType>::grow_neigharrays() {
auto d_cutsq=k_pairmliap->k_cutsq.template view<DeviceType>();
auto h_cutsq=k_pairmliap->k_cutsq.template view<LMPHostType>();
auto d_numneighs = k_numneighs.template view<DeviceType>();
Kokkos::parallel_reduce(natomneigh, KOKKOS_LAMBDA (int ii, int &contrib) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &contrib) {
const int i = d_ilist[ii];
int count=0;
const double xtmp = x(i, 0);

View File

@ -75,7 +75,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_forces(class MLIAPData *data_
Kokkos::View<double[6], DeviceType> virial("virial");
data->k_pairmliap->k_vatom.template modify<LMPHostType>();
data->k_pairmliap->k_vatom.template sync<DeviceType>();
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA(int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA(int ii) {
double fij[3];
const int i = d_iatoms(ii);
@ -187,7 +187,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_force_gradients(class MLIAPDa
auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz;
Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
const int i = d_iatoms(ii);
// ensure rij, inside, wj, and rcutij are of size jnum

View File

@ -147,7 +147,7 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
auto l_numneigh_half = d_numneigh_half;
auto l_offset = d_offset;
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,inum_half), LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
int i = l_ilist_half[ii];
m_fill += l_numneigh_half[i];
if (final)

View File

@ -302,7 +302,7 @@ void PairMLIAPKokkos<DeviceType>::e_tally(MLIAPData* data)
auto d_iatoms = k_data->k_iatoms.template view<DeviceType>();
auto d_eatoms = k_data->k_eatoms.template view<DeviceType>();
auto d_eatom = k_eatom.template view<DeviceType>();
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
d_eatom(d_iatoms(ii)) = d_eatoms(ii);
});
k_eatom.modify<DeviceType>();

View File

@ -532,7 +532,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
auto l_neighbors = d_neighbors;
// compute number of pairs for each atom i
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<DeviceType>::member_type& team) {
int i = team.league_rank();
int gi = l_ilist(gi1 + i);
double xi0 = l_x(gi, 0);
@ -555,7 +555,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
});
// accumalative sum
Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) {
Kokkos::parallel_scan("InclusivePrefixSum", Kokkos::RangePolicy<DeviceType>(0,Ni + 1), KOKKOS_LAMBDA(int i, int& update, const bool final) {
if (i > 0) {
update += l_numij(i);
if (final) {
@ -582,7 +582,7 @@ void PairPODKokkos<DeviceType>::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,
auto l_map = d_map;
auto l_type = type;
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<DeviceType>::member_type& team) {
int i = team.league_rank();
int gi = l_ilist(gi1 + i);
double xi0 = l_x(gi, 0);
@ -622,7 +622,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree,
int l_inversedegree, int l_nbesselpars, int Nij)
{
Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("ComputeRadialBasis", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
double xij1 = l_rij(0+3*n);
double xij2 = l_rij(1+3*n);
double xij3 = l_rij(2+3*n);
@ -722,7 +722,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
template<class DeviceType>
void PairPODKokkos<DeviceType>::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2)
{
Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("MatrixMultiply", Kokkos::RangePolicy<DeviceType>(0,r1 * c2), KOKKOS_LAMBDA(int idx) {
int j = idx / r1; // Calculate column index
int i = idx % r1; // Calculate row index
double sum = 0.0;
@ -737,7 +737,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N)
{
Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) {
Kokkos::parallel_for("AngularBasis", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int j) {
double x = l_rij(j*3 + 0);
double y = l_rij(j*3 + 1);
double z = l_rij(j*3 + 2);
@ -817,7 +817,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
{
int totalIterations = l_nrbf3 * l_K3 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int k = idx % l_K3;
int temp = idx / l_K3;
int m = temp % l_nrbf3;
@ -835,7 +835,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
});
}
else {
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int k = idx % l_K3;
int temp = idx / l_K3;
int m = temp % l_nrbf3;
@ -863,7 +863,7 @@ void PairPODKokkos<DeviceType>::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_
int l_nrbf2, const int Ni, const int Nij)
{
int totalIterations = l_nrbf2 * Nij;
Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("twobodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx / l_nrbf2; // pair index
int m = idx % l_nrbf2; // rbd index
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
@ -876,7 +876,7 @@ void PairPODKokkos<DeviceType>::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod
t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij)
{
int totalIterations = l_nrbf2 * Nij;
Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("twobody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx / l_nrbf2; // pair index
int m = idx % l_nrbf2; // rbd index
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
@ -893,7 +893,7 @@ void PairPODKokkos<DeviceType>::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po
int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni)
{
int totalIterations = l_nrbf3 * Ni;
Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("ThreeBodyDesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int m = idx % l_nrbf3;
int i = idx / l_nrbf3;
int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i;
@ -925,7 +925,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
{
int totalIterations = l_nrbf3 * Nij;
if (l_nelements==1) {
Kokkos::parallel_for("threebody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
int idxR = j + Nij * m; // Pre-compute the index for rbf
@ -961,7 +961,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
}
else {
int N3 = Ni * l_nabf3 * l_nrbf3;
Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Derive the original j value
int m = idx % l_nrbf3; // Derive the original m value
int i2 = l_tj(j) - 1;
@ -1007,7 +1007,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
{
int totalIterations = l_nrbf3 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
for (int p = 0; p < l_nabf3; p++) {
@ -1024,7 +1024,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
}
else {
int N3 = Ni * l_nabf3 * l_nrbf3;
Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf3; // Derive the original j value
int m = idx % l_nrbf3; // Derive the original m value
for (int p = 0; p < l_nabf3; p++) {
@ -1054,7 +1054,7 @@ void PairPODKokkos<DeviceType>::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_po
t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni)
{
int totalIterations = l_nrbf4 * Ni;
Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int m = idx % l_nrbf4;
int i = idx / l_nrbf4;
int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i;
@ -1092,7 +1092,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
{
int totalIterations = l_nrbf4 * Nij;
if (l_nelements==1) {
Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forces1", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);
@ -1151,7 +1151,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
}
else {
int N3 = Ni * l_nabf4 * l_nrbf4;
Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forces2", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxM = j + Nij * m;
@ -1241,7 +1241,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
{
int totalIterations = l_nrbf4 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxU = l_K3 * m + l_K3*l_nrbf3*i;
@ -1268,7 +1268,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
}
else {
int N3 = Ni * l_nabf4 * l_nrbf4;
Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
for (int p = 0; p < l_nabf4; p++) {
@ -1311,7 +1311,7 @@ void PairPODKokkos<DeviceType>::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe
t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_K3, int Nij)
{
int totalIterations = l_nrbf3 * Nij;
Kokkos::parallel_for("allbody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("allbody_forces", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
int i2 = l_tj(j) - 1;
@ -1346,7 +1346,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
{
int totalIterations = n12 * Ni;
Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("crossdesc", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int i = idx / Ni;
@ -1359,7 +1359,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
{
int totalIterations = n12 * Ni;
Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("crossdesc_reduction", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni; // Ni
int m = idx / Ni; // n12
int k1 = ind1(m); // dd1
@ -1375,7 +1375,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
template<class DeviceType>
void PairPODKokkos<DeviceType>::set_array_to_zero(t_pod_1d a, int N)
{
Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) {
Kokkos::parallel_for("initialize_array", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int i) {
a(i) = 0.0;
});
}
@ -1480,7 +1480,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
int nDes = Mdesc;
int nCoeff = nCoeffPerElement;
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
int nc = nCoeff*(tyai[n]-1);
ei[n] = cefs[0 + nc];
for (int m=0; m<nDes; m++)
@ -1488,7 +1488,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
});
int totalIterations = Ni*nDes;
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int m = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1516,7 +1516,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
int nCoeff = nCoeffPerElement;
int totalIterations = Ni*nCom;
Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("pca", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int k = idx / Ni;
double sum = 0.0;
@ -1528,7 +1528,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
});
totalIterations = Ni*nCls;
Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("inverse_square_distances", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int j = idx / Ni;
int typei = tyai[i]-1;
@ -1541,14 +1541,14 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
D[i + Ni*j] = 1.0 / sum;
});
Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) {
Kokkos::parallel_for("Probabilities", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int i) {
double sum = 0;
for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];
sumD[i] = sum;
for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;
});
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
int nc = nCoeff*(tyai[n]-1);
ei[n] = cefs[0 + nc];
for (int k = 0; k<nCls; k++)
@ -1556,7 +1556,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
});
Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int k = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1567,7 +1567,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
});
totalIterations = Ni*nDes;
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int m = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1577,7 +1577,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
cb[n + Ni*m] = sum;
});
Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totaliterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int m = idx / Ni;
int typei = tyai[i]-1;
@ -1670,7 +1670,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij)
{
auto l_f = f;
Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("TallyForce", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
int im = l_ai(n);
int jm = l_aj(n);
int n3 = 3*n;
@ -1694,7 +1694,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
// For global energy tally
if (eflag_global) {
double local_eng_vdwl = 0.0;
Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) {
Kokkos::parallel_reduce("GlobalEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k, E_FLOAT& update) {
update += l_ei(k);
}, local_eng_vdwl);
@ -1704,7 +1704,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
// For per-atom energy tally
if (eflag_atom) {
Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) {
Kokkos::parallel_for("PerAtomEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k) {
l_eatom(istart + k) += l_ei(k);
});
}
@ -1718,7 +1718,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
if (vflag_global) {
for (int j=0; j<3; j++) {
F_FLOAT sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(j + k3) * l_fij(j + k3);
}, sum);
@ -1726,21 +1726,21 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
}
F_FLOAT sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(k3) * l_fij(1 + k3);
}, sum);
virial[3] -= sum;
sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(k3) * l_fij(2 + k3);
}, sum);
virial[4] -= sum;
sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(1+k3) * l_fij(2+k3);
}, sum);
@ -1748,7 +1748,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
}
if (vflag_atom) {
Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) {
Kokkos::parallel_for("PerAtomStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k) {
int i = l_ai(k);
int j = l_aj(k);
int k3 = 3*k;

View File

@ -53,6 +53,8 @@ lmpinstalledpkgs.h
lmpgitversion.h
mliap_model_python_couple.cpp
mliap_model_python_couple.h
# removed in Dec 2024
group_kokkos.cpp
# renamed in September 2024
group_ndx.cpp
group_ndx.h

View File

@ -23,6 +23,7 @@
#include "comm_kokkos.h" // IWYU pragma: export
#include "comm_tiled_kokkos.h" // IWYU pragma: export
#include "domain_kokkos.h" // IWYU pragma: export
#include "group_kokkos.h" // IWYU pragma: export
#include "kokkos.h" // IWYU pragma: export
#include "memory_kokkos.h" // IWYU pragma: export
#include "modify_kokkos.h" // IWYU pragma: export
@ -39,6 +40,7 @@
#include "comm_brick.h"
#include "comm_tiled.h"
#include "domain.h"
#include "group.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
@ -86,6 +88,11 @@ class DomainKokkos : public Domain {
DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {}
};
class GroupKokkos : public Group {
public:
GroupKokkos(class LAMMPS *lmp) : Group(lmp) {}
};
class NeighborKokkos : public Neighbor {
public:
NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {}

View File

@ -872,7 +872,9 @@ void LAMMPS::create()
else
atom->create_avec("atomic",0,nullptr,1);
group = new Group(this);
if (kokkos) group = new GroupKokkos(this);
else group = new Group(this);
force = new Force(this); // must be after group, to create temperature
if (kokkos) modify = new ModifyKokkos(this);