Merge pull request #25 from timattox/USER-DPD_GBhacks_cudafix

Fix CUDA runtime issues for USER-DPD Kokkos code.
This commit is contained in:
Tim Mattox
2017-08-09 15:32:03 -04:00
committed by GitHub
6 changed files with 180 additions and 87 deletions

View File

@ -73,11 +73,11 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0)
{
kokkosable = 1;
// atomKK = (AtomKokkos *) atom;
// execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
// datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK;
// datamask_modify = Q_MASK | X_MASK;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
if (narg != 3) error->all(FLERR,"Illegal fix shardlow command");
@ -167,6 +167,7 @@ void FixShardlowKokkos<DeviceType>::init()
//FIXME either create cutsq and fill it in, or just point to pairDPD's...
// memory->destroy(cutsq); //FIXME
// memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"FixShardlowKokkos:cutsq");
k_pairDPDE->k_cutsq.template sync<DeviceType>();
d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>(); //FIXME
const double boltz2 = 2.0*force->boltz;
@ -288,10 +289,6 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
rand_type rand_gen = rand_pool.get_state(id);
#endif
const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
const double boltz_inv = 1.0/force->boltz;
const double ftm2v = force->ftm2v;
const double dt = update->dt;
int ct = count;
int ii = start_ii;
@ -436,7 +433,7 @@ template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
int start_ii, int count, int id
)
) const
{
#ifdef DPD_USE_RAN_MARS
class RanMars *pRNG = pp_random[id];
@ -444,9 +441,6 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
rand_type rand_gen = rand_pool.get_state(id);
#endif
const double boltz_inv = 1.0/force->boltz;
const double ftm2v = force->ftm2v;
const double dt = update->dt;
int ct = count;
int ii = start_ii;
@ -639,6 +633,16 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
ssa_gitemLoc = np_ssa->ssa_gitemLoc;
ssa_gitemLen = np_ssa->ssa_gitemLen;
np_ssa->k_ssa_itemLoc.template sync<DeviceType>();
np_ssa->k_ssa_itemLen.template sync<DeviceType>();
np_ssa->k_ssa_gitemLoc.template sync<DeviceType>();
np_ssa->k_ssa_gitemLen.template sync<DeviceType>();
np_ssa->k_ssa_phaseLen.template sync<LMPHostType>();
np_ssa->k_ssa_gphaseLen.template sync<LMPHostType>();
auto h_ssa_phaseLen = np_ssa->k_ssa_phaseLen.h_view;
auto h_ssa_gphaseLen = np_ssa->k_ssa_gphaseLen.h_view;
int maxWorkItemCt = (int) ssa_itemLoc.dimension_1();
if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) {
maxWorkItemCt = (int) ssa_gitemLoc.dimension_1();
@ -670,62 +674,64 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
deep_copy(d_hist, h_hist);
#endif
// process neighbors in the local AIR
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int workItemCt = ssa_phaseLen[workPhase];
//theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
boltz_inv = 1.0/force->boltz;
ftm2v = force->ftm2v;
dt = update->dt;
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
int ct = ssa_itemLen(workPhase, workItem);
int ii = ssa_itemLoc(workPhase, workItem);
ssa_update_dpde<false>(ii, ct, workItem);
});
} else {
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
int ct = ssa_itemLen(workPhase, workItem);
int ii = ssa_itemLoc(workPhase, workItem);
ssa_update_dpde<true>(ii, ct, workItem);
});
}
k_params.template sync<DeviceType>();
// process neighbors in the local AIR
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int workItemCt = h_ssa_phaseLen[workPhase];
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
}
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
//Loop over all 13 outward directions (7 stages)
for (int workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
// int airnum = workPhase + 1;
int workItemCt = ssa_gphaseLen[workPhase];
int workItemCt = h_ssa_gphaseLen[workPhase];
// Communicate the updated velocities to all nodes
atomKK->sync(Host,V_MASK);
comm->forward_comm_fix(this);
atomKK->modified(Host,V_MASK);
if(k_pairDPDE){
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
// must capture local variables, not class variables
atomKK->sync(execution_space,UCOND_MASK | UMECH_MASK);
auto l_uCond = uCond;
auto l_uMech = uMech;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) {
uCond(i) = 0.0;
uMech(i) = 0.0;
l_uCond(i) = 0.0;
l_uMech(i) = 0.0;
});
DeviceType::fence();
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK);
}
// process neighbors in this AIR
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
int ct = ssa_gitemLen(workPhase, workItem);
int ii = ssa_gitemLoc(workPhase, workItem);
ssa_update_dpde<false>(ii, ct, workItem);
});
} else {
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
int ct = ssa_gitemLen(workPhase, workItem);
int ii = ssa_gitemLoc(workPhase, workItem);
ssa_update_dpde<true>(ii, ct, workItem);
});
}
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
// Communicate the ghost deltas to the atom owners
atomKK->sync(Host,V_MASK | UCOND_MASK | UMECH_MASK);
comm->reverse_comm_fix(this);
atomKK->modified(Host,V_MASK | UCOND_MASK | UMECH_MASK);
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
@ -744,6 +750,24 @@ fprintf(stdout, "\n%6d %6d,%6d %6d: "
copymode = 0;
}
template<class DeviceType>
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int &workItem) const {
const int ct = ssa_itemLen(workPhase, workItem);
const int ii = ssa_itemLoc(workPhase, workItem);
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
}
template<class DeviceType>
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int &workItem) const {
const int ct = ssa_gitemLen(workPhase, workItem);
const int ii = ssa_gitemLoc(workPhase, workItem);
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>

View File

@ -30,6 +30,12 @@ FixStyle(shardlow/kk/host,FixShardlowKokkos<LMPHostType>)
namespace LAMMPS_NS {
template<bool STACKPARAMS>
struct TagFixShardlowSSAUpdateDPDE{};
template<bool STACKPARAMS>
struct TagFixShardlowSSAUpdateDPDEGhost{};
template<class DeviceType>
class FixShardlowKokkos : public FixShardlow {
public:
@ -60,6 +66,14 @@ class FixShardlowKokkos : public FixShardlow {
F_FLOAT cutinv,halfsigma,kappa,alpha;
};
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int&) const;
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int&) const;
#ifdef DEBUG_PAIR_CT
typename AT::t_int_2d d_counters;
typename HAT::t_int_2d h_counters;
@ -68,6 +82,9 @@ class FixShardlowKokkos : public FixShardlow {
#endif
protected:
int workPhase;
double theta_ij_inv,boltz_inv,ftm2v,dt;
// class PairDPDfdt *pairDPD;
PairDPDfdtEnergyKokkos<DeviceType> *k_pairDPDE;
@ -125,7 +142,7 @@ class FixShardlowKokkos : public FixShardlow {
// void ssa_update_dpd(int, int); // Constant Temperature
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void ssa_update_dpde(int, int, int); // Constant Energy
void ssa_update_dpde(int, int, int) const; // Constant Energy
};

View File

@ -212,8 +212,13 @@ void NBinSSAKokkos<DeviceType>::bin_atoms()
});
DeviceType::fence();
}
k_bins.modify<DeviceType>();
k_bincount.modify<DeviceType>();
c_bins = bins; // bins won't change until the next bin_atoms
k_gbins.modify<DeviceType>();
k_gbincount.modify<DeviceType>();
//now dispose of the k_binID array
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0);
binID = k_binID.view<DeviceType>();

View File

@ -149,17 +149,21 @@ void NPairSSAKokkos<DeviceType>::copy_stencil_info()
k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt);
ssa_phaseOff = k_ssa_phaseOff.view<DeviceType>();
}
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
k_ssa_phaseOff.sync<LMPHostType>();
int workPhase = 0;
for (int zoff = sz1 - 1; zoff >= 0; --zoff) {
for (int yoff = sy1 - 1; yoff >= 0; --yoff) {
for (int xoff = sx1 - 1; xoff >= 0; --xoff) {
ssa_phaseOff(workPhase, 0) = xoff;
ssa_phaseOff(workPhase, 1) = yoff;
ssa_phaseOff(workPhase, 2) = zoff;
h_ssa_phaseOff(workPhase, 0) = xoff;
h_ssa_phaseOff(workPhase, 1) = yoff;
h_ssa_phaseOff(workPhase, 2) = zoff;
workPhase++;
}
}
}
k_ssa_phaseOff.modify<LMPHostType>();
k_ssa_phaseOff.sync<DeviceType>();
}
@ -250,18 +254,33 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
ssa_itemLen = k_ssa_itemLen.view<DeviceType>();
}
k_ssa_itemLoc.sync<LMPHostType>();
k_ssa_itemLen.sync<LMPHostType>();
k_ssa_gitemLoc.sync<LMPHostType>();
k_ssa_gitemLen.sync<LMPHostType>();
k_ssa_phaseOff.sync<LMPHostType>();
k_ssa_phaseLen.sync<LMPHostType>();
auto h_ssa_itemLoc = k_ssa_itemLoc.h_view;
auto h_ssa_itemLen = k_ssa_itemLen.h_view;
auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view;
auto h_ssa_gitemLen = k_ssa_gitemLen.h_view;
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
auto h_ssa_phaseLen = k_ssa_phaseLen.h_view;
{ // Preflight the neighbor list workplan
const typename ArrayTypes<DeviceType>::t_int_1d_const c_bincount = k_bincount.view<DeviceType>();
const typename ArrayTypes<DeviceType>::t_int_2d_const c_bins = k_bins.view<DeviceType>();
const typename ArrayTypes<DeviceType>::t_int_1d_const_um c_stencil = k_stencil.view<DeviceType>();
const typename ArrayTypes<DeviceType>::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view<DeviceType>();
k_bincount.sync<LMPHostType>();
auto h_bincount = k_bincount.h_view;
k_stencil.sync<LMPHostType>();
auto h_stencil = k_stencil.h_view;
k_nstencil_ssa.sync<LMPHostType>();
auto h_nstencil_ssa = k_nstencil_ssa.h_view;
int inum = 0;
// loop over bins with local atoms, counting half of the neighbors
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int zoff = ssa_phaseOff(workPhase, 2);
int yoff = ssa_phaseOff(workPhase, 1);
int xoff = ssa_phaseOff(workPhase, 0);
int zoff = h_ssa_phaseOff(workPhase, 2);
int yoff = h_ssa_phaseOff(workPhase, 1);
int xoff = h_ssa_phaseOff(workPhase, 0);
int workItem = 0;
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
@ -276,14 +295,14 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
const int ibinCt = c_bincount(ibin);
const int ibinCt = h_bincount(ibin);
if (ibinCt > 0) {
int base_n = 0;
bool include_same = false;
// count all local atoms in the current stencil "subphase" as potential neighbors
for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) {
const int jbin = ibin+c_stencil(k);
if (jbin != ibin) base_n += c_bincount(jbin);
for (int k = h_nstencil_ssa(subphase); k < h_nstencil_ssa(subphase+1); k++) {
const int jbin = ibin+h_stencil(k);
if (jbin != ibin) base_n += h_bincount(jbin);
else include_same = true;
}
// Calculate how many ibin particles would have had some neighbors
@ -291,10 +310,10 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
else if (include_same) inum += ibinCt - 1;
}
}
ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
h_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
h_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
#ifdef DEBUG_SSA_BUILD_LOCALS
if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
if (h_ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
,comm->me
,workPhase
,workItem
@ -311,14 +330,14 @@ if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3
fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n"
,comm->me
,workPhase
,inum - ssa_itemLoc(workPhase, 0)
,inum - h_ssa_itemLoc(workPhase, 0)
,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt
,workItem
,(inum - ssa_itemLoc(workPhase, 0)) / (double) workItem
,(inum - h_ssa_itemLoc(workPhase, 0)) / (double) workItem
);
#endif
// record where workPhase ends
ssa_phaseLen(workPhase) = workItem;
h_ssa_phaseLen(workPhase) = workItem;
}
#ifdef DEBUG_SSA_BUILD_LOCALS
fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n"
@ -331,15 +350,30 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
#endif
nl_size = inum; // record how much space is needed for the local work plan
}
// count how many ghosts might have neighbors, and increase the work plan storage
k_gbincount.sync<LMPHostType>();
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
int len = k_gbincount.h_view(workPhase + 1);
ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
ssa_gitemLen(workPhase,0) = len;
h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
h_ssa_gitemLen(workPhase,0) = len;
nl_size += len;
}
list->grow(nl_size); // Make special larger SSA neighbor list
k_ssa_itemLoc.modify<LMPHostType>();
k_ssa_itemLen.modify<LMPHostType>();
k_ssa_gitemLoc.modify<LMPHostType>();
k_ssa_gitemLen.modify<LMPHostType>();
k_ssa_phaseLen.modify<LMPHostType>();
k_ssa_itemLoc.sync<DeviceType>();
k_ssa_itemLen.sync<DeviceType>();
k_ssa_gitemLen.sync<DeviceType>();
k_ssa_gitemLoc.sync<DeviceType>();
k_ssa_phaseOff.sync<DeviceType>();
k_ssa_phaseLen.sync<DeviceType>();
k_ssa_gphaseLen.sync<DeviceType>();
NPairSSAKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
@ -422,15 +456,28 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_locals_onePhase(firstTry, comm->me, workPhase);
});
data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) +
ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1);
k_ssa_itemLoc.modify<DeviceType>();
k_ssa_itemLen.modify<DeviceType>();
k_ssa_phaseLen.modify<DeviceType>();
k_ssa_itemLoc.sync<LMPHostType>();
k_ssa_itemLen.sync<LMPHostType>();
k_ssa_phaseLen.sync<LMPHostType>();
data.neigh_list.inum = h_ssa_itemLoc(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1) +
h_ssa_itemLen(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1);
// loop over AIR ghost atoms, storing their local neighbors
Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_ghosts_onePhase(workPhase);
});
data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) +
ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
k_ssa_gitemLoc.modify<DeviceType>();
k_ssa_gitemLen.modify<DeviceType>();
k_ssa_gphaseLen.modify<DeviceType>();
k_ssa_gitemLoc.sync<LMPHostType>();
k_ssa_gitemLen.sync<LMPHostType>();
k_ssa_gphaseLen.sync<LMPHostType>();
auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view;
data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) +
h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
firstTry = false;
DeviceType::fence();
@ -445,12 +492,12 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
}
}
k_ssa_phaseLen.modify<DeviceType>();
k_ssa_itemLoc.modify<DeviceType>();
k_ssa_itemLen.modify<DeviceType>();
k_ssa_gphaseLen.modify<DeviceType>();
k_ssa_gitemLoc.modify<DeviceType>();
k_ssa_gitemLen.modify<DeviceType>();
//k_ssa_phaseLen.modify<DeviceType>();
//k_ssa_itemLoc.modify<DeviceType>();
//k_ssa_itemLen.modify<DeviceType>();
//k_ssa_gphaseLen.modify<DeviceType>();
//k_ssa_gitemLoc.modify<DeviceType>();
//k_ssa_gitemLen.modify<DeviceType>();
list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for
list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something

View File

@ -426,7 +426,7 @@ KOKKOS_INLINE_FUNCTION
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
{
const bool one_type = (atom->ntypes == 1);
const bool one_type = (ntypes == 1);
if (isite1 == isite2)
if (one_type)
this->vectorized_operator<NEIGHFLAG,NEWTON_PAIR,EVFLAG,true, true, true>(ii, ev);
@ -797,7 +797,7 @@ KOKKOS_INLINE_FUNCTION
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
{
const bool one_type = (atom->ntypes == 1);
const bool one_type = (ntypes == 1);
if (isite1 == isite2)
if (one_type)
this->vectorized_operator<NEIGHFLAG,NEWTON_PAIR,EVFLAG,true, false, true>(ii, ev);
@ -1653,18 +1653,18 @@ template<class DeviceType>
void PairExp6rxKokkos<DeviceType>::allocate()
{
allocated = 1;
int n = atom->ntypes;
ntypes = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
for (int i = 1; i <= n; i++)
for (int j = i; j <= n; j++)
memory->create(setflag,ntypes+1,ntypes+1,"pair:setflag");
for (int i = 1; i <= ntypes; i++)
for (int j = i; j <= ntypes; j++)
setflag[i][j] = 0;
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
k_cutsq.template modify<LMPHostType>();
memory->create(cut,n+1,n+1,"pair:cut_lj");
memory->create(cut,ntypes+1,ntypes+1,"pair:cut_lj");
}

View File

@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx {
int eflag,vflag;
int nlocal,newton_pair,neighflag;
double special_lj[4];
int num_threads;
int num_threads,ntypes;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;