Merge pull request #25 from timattox/USER-DPD_GBhacks_cudafix
Fix CUDA runtime issues for USER-DPD Kokkos code.
This commit is contained in:
@ -73,11 +73,11 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
|
||||
FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0)
|
||||
{
|
||||
kokkosable = 1;
|
||||
// atomKK = (AtomKokkos *) atom;
|
||||
// execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
// datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK;
|
||||
// datamask_modify = Q_MASK | X_MASK;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
if (narg != 3) error->all(FLERR,"Illegal fix shardlow command");
|
||||
|
||||
@ -167,6 +167,7 @@ void FixShardlowKokkos<DeviceType>::init()
|
||||
//FIXME either create cutsq and fill it in, or just point to pairDPD's...
|
||||
// memory->destroy(cutsq); //FIXME
|
||||
// memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"FixShardlowKokkos:cutsq");
|
||||
k_pairDPDE->k_cutsq.template sync<DeviceType>();
|
||||
d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>(); //FIXME
|
||||
|
||||
const double boltz2 = 2.0*force->boltz;
|
||||
@ -288,10 +289,6 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
|
||||
rand_type rand_gen = rand_pool.get_state(id);
|
||||
#endif
|
||||
|
||||
const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
|
||||
const double boltz_inv = 1.0/force->boltz;
|
||||
const double ftm2v = force->ftm2v;
|
||||
const double dt = update->dt;
|
||||
int ct = count;
|
||||
int ii = start_ii;
|
||||
|
||||
@ -436,7 +433,7 @@ template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
|
||||
int start_ii, int count, int id
|
||||
)
|
||||
) const
|
||||
{
|
||||
#ifdef DPD_USE_RAN_MARS
|
||||
class RanMars *pRNG = pp_random[id];
|
||||
@ -444,9 +441,6 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
|
||||
rand_type rand_gen = rand_pool.get_state(id);
|
||||
#endif
|
||||
|
||||
const double boltz_inv = 1.0/force->boltz;
|
||||
const double ftm2v = force->ftm2v;
|
||||
const double dt = update->dt;
|
||||
int ct = count;
|
||||
int ii = start_ii;
|
||||
|
||||
@ -639,6 +633,16 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
|
||||
ssa_gitemLoc = np_ssa->ssa_gitemLoc;
|
||||
ssa_gitemLen = np_ssa->ssa_gitemLen;
|
||||
|
||||
np_ssa->k_ssa_itemLoc.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_itemLen.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_gitemLoc.template sync<DeviceType>();
|
||||
np_ssa->k_ssa_gitemLen.template sync<DeviceType>();
|
||||
|
||||
np_ssa->k_ssa_phaseLen.template sync<LMPHostType>();
|
||||
np_ssa->k_ssa_gphaseLen.template sync<LMPHostType>();
|
||||
auto h_ssa_phaseLen = np_ssa->k_ssa_phaseLen.h_view;
|
||||
auto h_ssa_gphaseLen = np_ssa->k_ssa_gphaseLen.h_view;
|
||||
|
||||
int maxWorkItemCt = (int) ssa_itemLoc.dimension_1();
|
||||
if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) {
|
||||
maxWorkItemCt = (int) ssa_gitemLoc.dimension_1();
|
||||
@ -670,62 +674,64 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
|
||||
deep_copy(d_hist, h_hist);
|
||||
#endif
|
||||
|
||||
// process neighbors in the local AIR
|
||||
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int workItemCt = ssa_phaseLen[workPhase];
|
||||
//theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
|
||||
boltz_inv = 1.0/force->boltz;
|
||||
ftm2v = force->ftm2v;
|
||||
dt = update->dt;
|
||||
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
|
||||
int ct = ssa_itemLen(workPhase, workItem);
|
||||
int ii = ssa_itemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<false>(ii, ct, workItem);
|
||||
});
|
||||
} else {
|
||||
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
|
||||
int ct = ssa_itemLen(workPhase, workItem);
|
||||
int ii = ssa_itemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<true>(ii, ct, workItem);
|
||||
});
|
||||
}
|
||||
k_params.template sync<DeviceType>();
|
||||
|
||||
// process neighbors in the local AIR
|
||||
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
|
||||
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int workItemCt = h_ssa_phaseLen[workPhase];
|
||||
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
|
||||
}
|
||||
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
//Loop over all 13 outward directions (7 stages)
|
||||
for (int workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
|
||||
for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
|
||||
// int airnum = workPhase + 1;
|
||||
int workItemCt = ssa_gphaseLen[workPhase];
|
||||
int workItemCt = h_ssa_gphaseLen[workPhase];
|
||||
|
||||
// Communicate the updated velocities to all nodes
|
||||
atomKK->sync(Host,V_MASK);
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->modified(Host,V_MASK);
|
||||
|
||||
if(k_pairDPDE){
|
||||
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
|
||||
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
|
||||
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
|
||||
|
||||
// must capture local variables, not class variables
|
||||
atomKK->sync(execution_space,UCOND_MASK | UMECH_MASK);
|
||||
auto l_uCond = uCond;
|
||||
auto l_uMech = uMech;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) {
|
||||
uCond(i) = 0.0;
|
||||
uMech(i) = 0.0;
|
||||
l_uCond(i) = 0.0;
|
||||
l_uMech(i) = 0.0;
|
||||
});
|
||||
DeviceType::fence();
|
||||
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK);
|
||||
}
|
||||
|
||||
// process neighbors in this AIR
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
|
||||
int ct = ssa_gitemLen(workPhase, workItem);
|
||||
int ii = ssa_gitemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<false>(ii, ct, workItem);
|
||||
});
|
||||
} else {
|
||||
Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) {
|
||||
int ct = ssa_gitemLen(workPhase, workItem);
|
||||
int ii = ssa_gitemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<true>(ii, ct, workItem);
|
||||
});
|
||||
}
|
||||
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
|
||||
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
// Communicate the ghost deltas to the atom owners
|
||||
atomKK->sync(Host,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
comm->reverse_comm_fix(this);
|
||||
atomKK->modified(Host,V_MASK | UCOND_MASK | UMECH_MASK);
|
||||
|
||||
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
|
||||
|
||||
@ -744,6 +750,24 @@ fprintf(stdout, "\n%6d %6d,%6d %6d: "
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int &workItem) const {
|
||||
const int ct = ssa_itemLen(workPhase, workItem);
|
||||
const int ii = ssa_itemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int &workItem) const {
|
||||
const int ct = ssa_gitemLen(workPhase, workItem);
|
||||
const int ii = ssa_gitemLoc(workPhase, workItem);
|
||||
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -30,6 +30,12 @@ FixStyle(shardlow/kk/host,FixShardlowKokkos<LMPHostType>)
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
struct TagFixShardlowSSAUpdateDPDE{};
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
struct TagFixShardlowSSAUpdateDPDEGhost{};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixShardlowKokkos : public FixShardlow {
|
||||
public:
|
||||
@ -60,6 +66,14 @@ class FixShardlowKokkos : public FixShardlow {
|
||||
F_FLOAT cutinv,halfsigma,kappa,alpha;
|
||||
};
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int&) const;
|
||||
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int&) const;
|
||||
|
||||
#ifdef DEBUG_PAIR_CT
|
||||
typename AT::t_int_2d d_counters;
|
||||
typename HAT::t_int_2d h_counters;
|
||||
@ -68,6 +82,9 @@ class FixShardlowKokkos : public FixShardlow {
|
||||
#endif
|
||||
|
||||
protected:
|
||||
int workPhase;
|
||||
double theta_ij_inv,boltz_inv,ftm2v,dt;
|
||||
|
||||
// class PairDPDfdt *pairDPD;
|
||||
PairDPDfdtEnergyKokkos<DeviceType> *k_pairDPDE;
|
||||
|
||||
@ -125,7 +142,7 @@ class FixShardlowKokkos : public FixShardlow {
|
||||
// void ssa_update_dpd(int, int); // Constant Temperature
|
||||
template<bool STACKPARAMS>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ssa_update_dpde(int, int, int); // Constant Energy
|
||||
void ssa_update_dpde(int, int, int) const; // Constant Energy
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -212,8 +212,13 @@ void NBinSSAKokkos<DeviceType>::bin_atoms()
|
||||
});
|
||||
DeviceType::fence();
|
||||
}
|
||||
k_bins.modify<DeviceType>();
|
||||
k_bincount.modify<DeviceType>();
|
||||
c_bins = bins; // bins won't change until the next bin_atoms
|
||||
|
||||
k_gbins.modify<DeviceType>();
|
||||
k_gbincount.modify<DeviceType>();
|
||||
|
||||
//now dispose of the k_binID array
|
||||
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0);
|
||||
binID = k_binID.view<DeviceType>();
|
||||
|
||||
@ -149,17 +149,21 @@ void NPairSSAKokkos<DeviceType>::copy_stencil_info()
|
||||
k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt);
|
||||
ssa_phaseOff = k_ssa_phaseOff.view<DeviceType>();
|
||||
}
|
||||
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
|
||||
k_ssa_phaseOff.sync<LMPHostType>();
|
||||
int workPhase = 0;
|
||||
for (int zoff = sz1 - 1; zoff >= 0; --zoff) {
|
||||
for (int yoff = sy1 - 1; yoff >= 0; --yoff) {
|
||||
for (int xoff = sx1 - 1; xoff >= 0; --xoff) {
|
||||
ssa_phaseOff(workPhase, 0) = xoff;
|
||||
ssa_phaseOff(workPhase, 1) = yoff;
|
||||
ssa_phaseOff(workPhase, 2) = zoff;
|
||||
h_ssa_phaseOff(workPhase, 0) = xoff;
|
||||
h_ssa_phaseOff(workPhase, 1) = yoff;
|
||||
h_ssa_phaseOff(workPhase, 2) = zoff;
|
||||
workPhase++;
|
||||
}
|
||||
}
|
||||
}
|
||||
k_ssa_phaseOff.modify<LMPHostType>();
|
||||
k_ssa_phaseOff.sync<DeviceType>();
|
||||
|
||||
}
|
||||
|
||||
@ -250,18 +254,33 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
|
||||
ssa_itemLen = k_ssa_itemLen.view<DeviceType>();
|
||||
}
|
||||
|
||||
k_ssa_itemLoc.sync<LMPHostType>();
|
||||
k_ssa_itemLen.sync<LMPHostType>();
|
||||
k_ssa_gitemLoc.sync<LMPHostType>();
|
||||
k_ssa_gitemLen.sync<LMPHostType>();
|
||||
k_ssa_phaseOff.sync<LMPHostType>();
|
||||
k_ssa_phaseLen.sync<LMPHostType>();
|
||||
auto h_ssa_itemLoc = k_ssa_itemLoc.h_view;
|
||||
auto h_ssa_itemLen = k_ssa_itemLen.h_view;
|
||||
auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view;
|
||||
auto h_ssa_gitemLen = k_ssa_gitemLen.h_view;
|
||||
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
|
||||
auto h_ssa_phaseLen = k_ssa_phaseLen.h_view;
|
||||
|
||||
{ // Preflight the neighbor list workplan
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const c_bincount = k_bincount.view<DeviceType>();
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const c_bins = k_bins.view<DeviceType>();
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um c_stencil = k_stencil.view<DeviceType>();
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view<DeviceType>();
|
||||
k_bincount.sync<LMPHostType>();
|
||||
auto h_bincount = k_bincount.h_view;
|
||||
k_stencil.sync<LMPHostType>();
|
||||
auto h_stencil = k_stencil.h_view;
|
||||
k_nstencil_ssa.sync<LMPHostType>();
|
||||
auto h_nstencil_ssa = k_nstencil_ssa.h_view;
|
||||
int inum = 0;
|
||||
|
||||
// loop over bins with local atoms, counting half of the neighbors
|
||||
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int zoff = ssa_phaseOff(workPhase, 2);
|
||||
int yoff = ssa_phaseOff(workPhase, 1);
|
||||
int xoff = ssa_phaseOff(workPhase, 0);
|
||||
int zoff = h_ssa_phaseOff(workPhase, 2);
|
||||
int yoff = h_ssa_phaseOff(workPhase, 1);
|
||||
int xoff = h_ssa_phaseOff(workPhase, 0);
|
||||
int workItem = 0;
|
||||
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
|
||||
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
|
||||
@ -276,14 +295,14 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
|
||||
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
|
||||
|
||||
const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
|
||||
const int ibinCt = c_bincount(ibin);
|
||||
const int ibinCt = h_bincount(ibin);
|
||||
if (ibinCt > 0) {
|
||||
int base_n = 0;
|
||||
bool include_same = false;
|
||||
// count all local atoms in the current stencil "subphase" as potential neighbors
|
||||
for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) {
|
||||
const int jbin = ibin+c_stencil(k);
|
||||
if (jbin != ibin) base_n += c_bincount(jbin);
|
||||
for (int k = h_nstencil_ssa(subphase); k < h_nstencil_ssa(subphase+1); k++) {
|
||||
const int jbin = ibin+h_stencil(k);
|
||||
if (jbin != ibin) base_n += h_bincount(jbin);
|
||||
else include_same = true;
|
||||
}
|
||||
// Calculate how many ibin particles would have had some neighbors
|
||||
@ -291,10 +310,10 @@ void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
|
||||
else if (include_same) inum += ibinCt - 1;
|
||||
}
|
||||
}
|
||||
ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
|
||||
ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
|
||||
h_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
|
||||
h_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
|
||||
if (h_ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
|
||||
,comm->me
|
||||
,workPhase
|
||||
,workItem
|
||||
@ -311,14 +330,14 @@ if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3
|
||||
fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n"
|
||||
,comm->me
|
||||
,workPhase
|
||||
,inum - ssa_itemLoc(workPhase, 0)
|
||||
,inum - h_ssa_itemLoc(workPhase, 0)
|
||||
,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt
|
||||
,workItem
|
||||
,(inum - ssa_itemLoc(workPhase, 0)) / (double) workItem
|
||||
,(inum - h_ssa_itemLoc(workPhase, 0)) / (double) workItem
|
||||
);
|
||||
#endif
|
||||
// record where workPhase ends
|
||||
ssa_phaseLen(workPhase) = workItem;
|
||||
h_ssa_phaseLen(workPhase) = workItem;
|
||||
}
|
||||
#ifdef DEBUG_SSA_BUILD_LOCALS
|
||||
fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n"
|
||||
@ -331,15 +350,30 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
#endif
|
||||
nl_size = inum; // record how much space is needed for the local work plan
|
||||
}
|
||||
|
||||
// count how many ghosts might have neighbors, and increase the work plan storage
|
||||
k_gbincount.sync<LMPHostType>();
|
||||
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
|
||||
int len = k_gbincount.h_view(workPhase + 1);
|
||||
ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
|
||||
ssa_gitemLen(workPhase,0) = len;
|
||||
h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
|
||||
h_ssa_gitemLen(workPhase,0) = len;
|
||||
nl_size += len;
|
||||
}
|
||||
list->grow(nl_size); // Make special larger SSA neighbor list
|
||||
|
||||
k_ssa_itemLoc.modify<LMPHostType>();
|
||||
k_ssa_itemLen.modify<LMPHostType>();
|
||||
k_ssa_gitemLoc.modify<LMPHostType>();
|
||||
k_ssa_gitemLen.modify<LMPHostType>();
|
||||
k_ssa_phaseLen.modify<LMPHostType>();
|
||||
k_ssa_itemLoc.sync<DeviceType>();
|
||||
k_ssa_itemLen.sync<DeviceType>();
|
||||
k_ssa_gitemLen.sync<DeviceType>();
|
||||
k_ssa_gitemLoc.sync<DeviceType>();
|
||||
k_ssa_phaseOff.sync<DeviceType>();
|
||||
k_ssa_phaseLen.sync<DeviceType>();
|
||||
k_ssa_gphaseLen.sync<DeviceType>();
|
||||
|
||||
NPairSSAKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
@ -422,15 +456,28 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) {
|
||||
data.build_locals_onePhase(firstTry, comm->me, workPhase);
|
||||
});
|
||||
data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) +
|
||||
ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1);
|
||||
k_ssa_itemLoc.modify<DeviceType>();
|
||||
k_ssa_itemLen.modify<DeviceType>();
|
||||
k_ssa_phaseLen.modify<DeviceType>();
|
||||
k_ssa_itemLoc.sync<LMPHostType>();
|
||||
k_ssa_itemLen.sync<LMPHostType>();
|
||||
k_ssa_phaseLen.sync<LMPHostType>();
|
||||
data.neigh_list.inum = h_ssa_itemLoc(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1) +
|
||||
h_ssa_itemLen(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1);
|
||||
|
||||
// loop over AIR ghost atoms, storing their local neighbors
|
||||
Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) {
|
||||
data.build_ghosts_onePhase(workPhase);
|
||||
});
|
||||
data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) +
|
||||
ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
|
||||
k_ssa_gitemLoc.modify<DeviceType>();
|
||||
k_ssa_gitemLen.modify<DeviceType>();
|
||||
k_ssa_gphaseLen.modify<DeviceType>();
|
||||
k_ssa_gitemLoc.sync<LMPHostType>();
|
||||
k_ssa_gitemLen.sync<LMPHostType>();
|
||||
k_ssa_gphaseLen.sync<LMPHostType>();
|
||||
auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view;
|
||||
data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) +
|
||||
h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
|
||||
firstTry = false;
|
||||
|
||||
DeviceType::fence();
|
||||
@ -445,12 +492,12 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
}
|
||||
}
|
||||
|
||||
k_ssa_phaseLen.modify<DeviceType>();
|
||||
k_ssa_itemLoc.modify<DeviceType>();
|
||||
k_ssa_itemLen.modify<DeviceType>();
|
||||
k_ssa_gphaseLen.modify<DeviceType>();
|
||||
k_ssa_gitemLoc.modify<DeviceType>();
|
||||
k_ssa_gitemLen.modify<DeviceType>();
|
||||
//k_ssa_phaseLen.modify<DeviceType>();
|
||||
//k_ssa_itemLoc.modify<DeviceType>();
|
||||
//k_ssa_itemLen.modify<DeviceType>();
|
||||
//k_ssa_gphaseLen.modify<DeviceType>();
|
||||
//k_ssa_gitemLoc.modify<DeviceType>();
|
||||
//k_ssa_gitemLen.modify<DeviceType>();
|
||||
|
||||
list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for
|
||||
list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something
|
||||
|
||||
@ -426,7 +426,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
{
|
||||
const bool one_type = (atom->ntypes == 1);
|
||||
const bool one_type = (ntypes == 1);
|
||||
if (isite1 == isite2)
|
||||
if (one_type)
|
||||
this->vectorized_operator<NEIGHFLAG,NEWTON_PAIR,EVFLAG,true, true, true>(ii, ev);
|
||||
@ -797,7 +797,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
|
||||
|
||||
{
|
||||
const bool one_type = (atom->ntypes == 1);
|
||||
const bool one_type = (ntypes == 1);
|
||||
if (isite1 == isite2)
|
||||
if (one_type)
|
||||
this->vectorized_operator<NEIGHFLAG,NEWTON_PAIR,EVFLAG,true, false, true>(ii, ev);
|
||||
@ -1653,18 +1653,18 @@ template<class DeviceType>
|
||||
void PairExp6rxKokkos<DeviceType>::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->ntypes;
|
||||
ntypes = atom->ntypes;
|
||||
|
||||
memory->create(setflag,n+1,n+1,"pair:setflag");
|
||||
for (int i = 1; i <= n; i++)
|
||||
for (int j = i; j <= n; j++)
|
||||
memory->create(setflag,ntypes+1,ntypes+1,"pair:setflag");
|
||||
for (int i = 1; i <= ntypes; i++)
|
||||
for (int j = i; j <= ntypes; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
|
||||
memory->create(cut,n+1,n+1,"pair:cut_lj");
|
||||
memory->create(cut,ntypes+1,ntypes+1,"pair:cut_lj");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx {
|
||||
int eflag,vflag;
|
||||
int nlocal,newton_pair,neighflag;
|
||||
double special_lj[4];
|
||||
int num_threads;
|
||||
int num_threads,ntypes;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
|
||||
Reference in New Issue
Block a user