Switched to operator()'s and Tag's for the Kokkos launch objects.
- Switched from using lambda functions to operator()'s with type tags in FixRxKokkos. The lambda's were giving big problems in Cuda with the memory objects. This required that all referenced views be members of the FixRXKokkos class. - Add copymode controls to solve_reactions() to avoid the destructor freeing pointers carried forward from the copy constructor. Added the same to FixRX since its called, too.
This commit is contained in:
@ -69,13 +69,16 @@ FixRxKokkos<DeviceType>::FixRxKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
k_error_flag = DAT::tdual_int_scalar("FixRxKokkos::k_error_flag");
|
||||
|
||||
printf("Inside FixRxKokkos::FixRxKokkos\n");
|
||||
}
|
||||
|
||||
template <typename DeviceType>
|
||||
FixRxKokkos<DeviceType>::~FixRxKokkos()
|
||||
{
|
||||
printf("Inside FixRxKokkos::~FixRxKokkos\n");
|
||||
printf("Inside FixRxKokkos::~FixRxKokkos copymode= %d\n", copymode);
|
||||
if (copymode) return;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1315,6 +1318,95 @@ void FixRxKokkos<DeviceType>::pre_force(int vflag)
|
||||
|
||||
this->solve_reactions( vflag, true );
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixRxKokkos<DeviceType>::operator()(Tag_FixRxKokkos_zeroCounterViews, const int& i) const
|
||||
{
|
||||
d_diagnosticCounterPerODEnSteps(i) = 0;
|
||||
d_diagnosticCounterPerODEnFuncs(i) = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
template <bool ZERO_RATES>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixRxKokkos<DeviceType>::operator()(Tag_FixRxKokkos_solveSystems<ZERO_RATES>, const int& i, CounterType& counter) const
|
||||
{
|
||||
if (d_mask(i) & groupbit)
|
||||
{
|
||||
StridedArrayType<double,1> y( d_scratchSpace.ptr_on_device() + scratchSpaceSize * i );
|
||||
StridedArrayType<double,1> rwork( &y[nspecies] );
|
||||
|
||||
UserRHSDataKokkos<1> userData;
|
||||
userData.kFor.m_data = &( rwork[7*nspecies] );
|
||||
userData.rxnRateLaw.m_data = &( userData.kFor[ nreactions ] );
|
||||
|
||||
CounterType counter_i;
|
||||
|
||||
const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i);
|
||||
|
||||
//Compute the reaction rate constants
|
||||
for (int irxn = 0; irxn < nreactions; irxn++)
|
||||
{
|
||||
if (ZERO_RATES)
|
||||
userData.kFor[irxn] = 0.0;
|
||||
else
|
||||
{
|
||||
userData.kFor[irxn] = d_kineticsData.Arr(irxn) *
|
||||
pow(theta, d_kineticsData.nArr(irxn)) *
|
||||
exp(-d_kineticsData.Ea(irxn) / boltz / theta);
|
||||
}
|
||||
}
|
||||
|
||||
// Update ConcOld and initialize the ODE solution vector y[].
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
{
|
||||
const double tmp = d_dvector(ispecies, i);
|
||||
d_dvector(ispecies+nspecies, i) = tmp;
|
||||
y[ispecies] = tmp;
|
||||
}
|
||||
|
||||
// Solver the ODE system.
|
||||
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
|
||||
{
|
||||
k_rk4(t_stop, y, rwork, userData);
|
||||
}
|
||||
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
|
||||
{
|
||||
k_rkf45(nspecies, t_stop, y, rwork, userData, counter_i);
|
||||
|
||||
if (diagnosticFrequency == 1)
|
||||
{
|
||||
d_diagnosticCounterPerODEnSteps(i) = counter_i.nSteps;
|
||||
d_diagnosticCounterPerODEnFuncs(i) = counter_i.nFuncs;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the solution back in dvector.
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++)
|
||||
{
|
||||
if (y[ispecies] < -MY_EPSILON)
|
||||
{
|
||||
//error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON");
|
||||
k_error_flag.d_view() = 2;
|
||||
// This should be an atomic update.
|
||||
}
|
||||
else if (y[ispecies] < MY_EPSILON)
|
||||
y[ispecies] = 0.0;
|
||||
|
||||
d_dvector(ispecies,i) = y[ispecies];
|
||||
}
|
||||
|
||||
// Update the iteration statistics counter. Is this unique for each iteration?
|
||||
counter += counter_i;
|
||||
|
||||
} // if
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
@ -1322,12 +1414,15 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
{
|
||||
printf("Inside FixRxKokkos<DeviceType>::solve_reactions localTempFlag= %d isPreForce= %s\n", localTempFlag, isPreForce ? "True" : "false");
|
||||
|
||||
copymode = 1;
|
||||
|
||||
if (update_kinetics_data)
|
||||
create_kinetics_data();
|
||||
|
||||
TimerType timer_start = getTimeStamp();
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
//const int nlocal = atom->nlocal;
|
||||
this->nlocal = atom->nlocal;
|
||||
const int nghost = atom->nghost;
|
||||
const int newton_pair = force->newton_pair;
|
||||
|
||||
@ -1339,8 +1434,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
const int count = nlocal + (newton_pair ? nghost : 0);
|
||||
|
||||
memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal");
|
||||
d_dpdThetaLocal = k_dpdThetaLocal.d_view;
|
||||
h_dpdThetaLocal = k_dpdThetaLocal.h_view;
|
||||
this->d_dpdThetaLocal = k_dpdThetaLocal.d_view;
|
||||
this->h_dpdThetaLocal = k_dpdThetaLocal.h_view;
|
||||
|
||||
const int neighflag = lmp->kokkos->neighflag;
|
||||
|
||||
@ -1376,16 +1471,21 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
// ...
|
||||
|
||||
// Local references to the atomKK objects.
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_mask = atomKK->k_mask.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_float_2d d_dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d d_mask = atomKK->k_mask.view<DeviceType>();
|
||||
this->d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
this->d_dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
this->d_mask = atomKK->k_mask.view<DeviceType>();
|
||||
|
||||
// Get up-to-date data.
|
||||
atomKK->sync( execution_space, MASK_MASK | DVECTOR_MASK | DPDTHETA_MASK );
|
||||
|
||||
// Set some constants outside of the parallel_for
|
||||
const double boltz = force->boltz;
|
||||
const double t_stop = update->dt; // DPD time-step and integration length.
|
||||
//const double boltz = force->boltz;
|
||||
//const double t_stop = update->dt; // DPD time-step and integration length.
|
||||
this->boltz = force->boltz;
|
||||
this->t_stop = update->dt; // DPD time-step and integration length.
|
||||
|
||||
// Average DPD volume. Used in the RHS function.
|
||||
this->VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
@ -1398,17 +1498,18 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.d_view;
|
||||
d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.d_view;
|
||||
|
||||
Kokkos::parallel_for ( nlocal,
|
||||
LAMMPS_LAMBDA(const int i)
|
||||
{
|
||||
d_diagnosticCounterPerODEnSteps(i) = 0;
|
||||
d_diagnosticCounterPerODEnFuncs(i) = 0;
|
||||
}
|
||||
);
|
||||
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_zeroCounterViews>(0,nlocal), *this);
|
||||
//Kokkos::parallel_for ( nlocal,
|
||||
// LAMMPS_LAMBDA(const int i)
|
||||
// {
|
||||
// d_diagnosticCounterPerODEnSteps(i) = 0;
|
||||
// d_diagnosticCounterPerODEnFuncs(i) = 0;
|
||||
// }
|
||||
// );
|
||||
}
|
||||
|
||||
// Error flag for any failures.
|
||||
DAT::tdual_int_scalar k_error_flag("pair:error_flag");
|
||||
//DAT::tdual_int_scalar k_error_flag("pair:error_flag");
|
||||
|
||||
// Initialize and sync the device flag.
|
||||
k_error_flag.h_view() = 0;
|
||||
@ -1416,11 +1517,14 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
k_error_flag.template sync<DeviceType>();
|
||||
|
||||
// Create scratch array space.
|
||||
const size_t scratchSpaceSize = (8*nspecies + 2*nreactions);
|
||||
//const size_t scratchSpaceSize = (8*nspecies + 2*nreactions);
|
||||
this->scratchSpaceSize = (8*nspecies + 2*nreactions);
|
||||
//double *scratchSpace = new double[ scratchSpaceSize * nlocal ];
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal);
|
||||
//typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal);
|
||||
memory->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace");
|
||||
|
||||
#if 0
|
||||
Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter)
|
||||
{
|
||||
if (d_mask(i) & groupbit)
|
||||
@ -1514,8 +1618,15 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
|
||||
, TotalCounters // reduction value for all iterations.
|
||||
);
|
||||
#else
|
||||
if (setRatesToZero)
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_solveSystems<true > >(0,nlocal), *this, TotalCounters);
|
||||
else
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_solveSystems<false> >(0,nlocal), *this, TotalCounters);
|
||||
#endif
|
||||
|
||||
//delete [] scratchSpace;
|
||||
memory->destroy_kokkos (d_scratchSpace);
|
||||
|
||||
TimerType timer_ODE = getTimeStamp();
|
||||
|
||||
@ -1570,6 +1681,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF
|
||||
(diagnosticFrequency < 0 && update->ntimestep == update->laststep) )
|
||||
this->odeDiagnostics();
|
||||
}
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1654,7 +1767,8 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
|
||||
double my_max[numCounters], my_min[numCounters];
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
//const int nlocal = atom->nlocal;
|
||||
nlocal = atom->nlocal;
|
||||
HAT::t_int_1d h_mask = atomKK->k_mask.h_view;
|
||||
|
||||
for (int i = 0; i < numCounters; ++i)
|
||||
@ -1760,17 +1874,122 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixRxKokkos<DeviceType>::operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int& i) const
|
||||
{
|
||||
d_sumWeights(i) = 0.0;
|
||||
d_dpdThetaLocal(i) = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixRxKokkos<DeviceType>::operator()(Tag_FixRxKokkos_firstPairOperator<WT_FLAG,NEWTON_PAIR,NEIGHFLAG>, const int& ii) const
|
||||
{
|
||||
// Create an atomic view of sumWeights and dpdThetaLocal. Only needed
|
||||
// for Half/thread scenarios.
|
||||
typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename DAT::t_efloat_1d::device_type, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType;
|
||||
|
||||
AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal;
|
||||
AtomicViewType a_sumWeights = d_sumWeights;
|
||||
|
||||
// Local scalar accumulators.
|
||||
double i_dpdThetaLocal = 0.0;
|
||||
double i_sumWeights = 0.0;
|
||||
|
||||
const int i = d_ilist(ii);
|
||||
|
||||
const double xtmp = d_x(i,0);
|
||||
const double ytmp = d_x(i,1);
|
||||
const double ztmp = d_x(i,2);
|
||||
const int itype = d_type(i);
|
||||
|
||||
const int jnum = d_numneigh(i);
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++)
|
||||
{
|
||||
const int j = (d_neighbors(i,jj) & NEIGHMASK);
|
||||
const int jtype = d_type(j);
|
||||
|
||||
const double delx = xtmp - d_x(j,0);
|
||||
const double dely = ytmp - d_x(j,1);
|
||||
const double delz = ztmp - d_x(j,2);
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
const double cutsq_ij = d_cutsq(itype,jtype);
|
||||
|
||||
if (rsq < cutsq_ij)
|
||||
{
|
||||
const double rcut = sqrt( cutsq_ij );
|
||||
double rij = sqrt(rsq);
|
||||
double ratio = rij/rcut;
|
||||
|
||||
double wij = 0.0;
|
||||
|
||||
// Lucy's Weight Function
|
||||
if (WT_FLAG == LUCY)
|
||||
{
|
||||
wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio);
|
||||
i_dpdThetaLocal += wij / d_dpdTheta(j);
|
||||
if (NEWTON_PAIR || j < nlocal)
|
||||
a_dpdThetaLocal(j) += wij / d_dpdTheta(i);
|
||||
}
|
||||
|
||||
i_sumWeights += wij;
|
||||
if (NEWTON_PAIR || j < nlocal)
|
||||
a_sumWeights(j) += wij;
|
||||
}
|
||||
}
|
||||
|
||||
// Update, don't assign, the array value (because another iteration may have hit it).
|
||||
a_dpdThetaLocal(i) += i_dpdThetaLocal;
|
||||
a_sumWeights(i) += i_sumWeights;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixRxKokkos<DeviceType>::operator()(Tag_FixRxKokkos_2ndPairOperator<WT_FLAG,LOCAL_TEMP_FLAG>, const int& i) const
|
||||
{
|
||||
double wij = 0.0;
|
||||
|
||||
// Lucy Weight Function
|
||||
if (WT_FLAG == LUCY)
|
||||
{
|
||||
wij = 1.0;
|
||||
d_dpdThetaLocal(i) += wij / d_dpdTheta(i);
|
||||
}
|
||||
d_sumWeights(i) += wij;
|
||||
|
||||
// Normalized local temperature
|
||||
d_dpdThetaLocal(i) = d_dpdThetaLocal(i) / d_sumWeights(i);
|
||||
|
||||
if (LOCAL_TEMP_FLAG == HARMONIC)
|
||||
d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <typename DeviceType>
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
{
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread d_x = atomKK->k_x.view<DeviceType>();
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_type = atomKK->k_type.view<DeviceType>();
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_x_array_randomread d_x = atomKK->k_x.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d_randomread d_type = atomKK->k_type.view<DeviceType>();
|
||||
//typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
d_x = atomKK->k_x.view<DeviceType>();
|
||||
d_type = atomKK->k_type.view<DeviceType>();
|
||||
d_dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space, X_MASK | TYPE_MASK | DPDTHETA_MASK );
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
//const int nlocal = atom->nlocal;
|
||||
nlocal = atom->nlocal;
|
||||
const int nghost = atom->nghost;
|
||||
|
||||
printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost);
|
||||
@ -1780,14 +1999,15 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
//typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq = pairDPDEKK->k_cutsq.template view<DeviceType();
|
||||
|
||||
//!< Copies pulled from pairDPDE for local use since pairDPDEKK's objects are protected.
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
double **h_cutsq;
|
||||
//typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
//typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
//double **h_cutsq;
|
||||
|
||||
{
|
||||
const int ntypes = atom->ntypes;
|
||||
|
||||
memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq");
|
||||
//memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq");
|
||||
memory->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
|
||||
for (int i = 1; i <= ntypes; ++i)
|
||||
@ -1804,30 +2024,37 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
// Initialize the local temperature weight array
|
||||
int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0);
|
||||
|
||||
memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights");
|
||||
//memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights");
|
||||
memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights");
|
||||
d_sumWeights = k_sumWeights.d_view;
|
||||
h_sumWeights = k_sumWeights.h_view;
|
||||
|
||||
// Initialize the accumulator to zero ...
|
||||
Kokkos::parallel_for (sumWeightsCt,
|
||||
LAMMPS_LAMBDA(const int i)
|
||||
{
|
||||
d_sumWeights(i) = 0.0;
|
||||
}
|
||||
);
|
||||
//Kokkos::parallel_for (sumWeightsCt,
|
||||
// LAMMPS_LAMBDA(const int i)
|
||||
// {
|
||||
// d_sumWeights(i) = 0.0;
|
||||
// }
|
||||
// );
|
||||
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_zeroTemperatureViews>(0, sumWeightsCt), *this);
|
||||
|
||||
// Local list views. (This isn't working!)
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
if (not(list->kokkos))
|
||||
error->one(FLERR,"list is not a Kokkos list\n");
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors = k_list->d_neighbors;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_ilist = k_list->d_ilist;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh = k_list->d_numneigh;
|
||||
//typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors = k_list->d_neighbors;
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d d_ilist = k_list->d_ilist;
|
||||
//typename ArrayTypes<DeviceType>::t_int_1d d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
|
||||
const int inum = list->inum;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if 0
|
||||
Kokkos::parallel_for ( inum,
|
||||
LAMMPS_LAMBDA(const int ii)
|
||||
{
|
||||
@ -1892,6 +2119,9 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
a_sumWeights(i) += i_sumWeights;
|
||||
}
|
||||
);
|
||||
#else
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_firstPairOperator<WT_FLAG, NEWTON_PAIR, NEIGHFLAG> >(0, inum), *this);
|
||||
#endif
|
||||
|
||||
// Signal that dpdThetaLocal and sumWeights have been modified.
|
||||
k_dpdThetaLocal.template modify<DeviceType>();
|
||||
@ -1905,6 +2135,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
k_sumWeights. template sync<DeviceType>();
|
||||
|
||||
// self-interaction for local temperature
|
||||
#if 0
|
||||
Kokkos::parallel_for ( nlocal,
|
||||
LAMMPS_LAMBDA(const int i)
|
||||
{
|
||||
@ -1925,10 +2156,15 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i);
|
||||
}
|
||||
);
|
||||
#else
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType, Tag_FixRxKokkos_2ndPairOperator<WT_FLAG, LOCAL_TEMP_FLAG> >(0, nlocal), *this);
|
||||
#endif
|
||||
|
||||
// Clean up the local kokkos data.
|
||||
memory->destroy_kokkos(k_cutsq, h_cutsq);
|
||||
memory->destroy_kokkos(k_sumWeights, sumWeights);
|
||||
//memory->destroy_kokkos(k_cutsq, h_cutsq);
|
||||
memory->destroy_kokkos(k_cutsq);
|
||||
//memory->destroy_kokkos(k_sumWeights, sumWeights);
|
||||
memory->destroy_kokkos(k_sumWeights);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -30,6 +30,47 @@ FixStyle(rx/kk/host,FixRxKokkos<LMPHostType>)
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct Tag_FixRxKokkos_zeroTemperatureViews {};
|
||||
struct Tag_FixRxKokkos_zeroCounterViews {};
|
||||
|
||||
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
struct Tag_FixRxKokkos_firstPairOperator {};
|
||||
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
|
||||
struct Tag_FixRxKokkos_2ndPairOperator {};
|
||||
|
||||
template <bool ZERO_RATES>
|
||||
struct Tag_FixRxKokkos_solveSystems {};
|
||||
|
||||
struct s_CounterType
|
||||
{
|
||||
int nSteps, nIters, nFuncs, nFails;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_CounterType& operator+=(const s_CounterType &rhs)
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
volatile s_CounterType& operator+=(const volatile s_CounterType &rhs) volatile
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
typedef struct s_CounterType CounterType;
|
||||
|
||||
template <typename DeviceType>
|
||||
class FixRxKokkos : public FixRX {
|
||||
public:
|
||||
@ -41,42 +82,34 @@ class FixRxKokkos : public FixRX {
|
||||
virtual void setup_pre_force(int);
|
||||
virtual void pre_force(int);
|
||||
|
||||
//template <typename SolverTag>
|
||||
// KOKKOS_INLINE_FUNCTION
|
||||
//void operator()(SolverTag, const int&) const;
|
||||
// Define a value_type here for the reduction operator on CounterType.
|
||||
typedef CounterType value_type;
|
||||
|
||||
struct CounterType
|
||||
{
|
||||
int nSteps, nIters, nFuncs, nFails;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_zeroCounterViews, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
CounterType& operator+=(const CounterType &rhs)
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_firstPairOperator<WT_FLAG,NEWTON_PAIR,NEIGHFLAG>, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
volatile CounterType& operator+=(const volatile CounterType &rhs) volatile
|
||||
{
|
||||
nSteps += rhs.nSteps;
|
||||
nIters += rhs.nIters;
|
||||
nFuncs += rhs.nFuncs;
|
||||
nFails += rhs.nFails;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_2ndPairOperator<WT_FLAG,LOCAL_TEMP_FLAG>, const int&) const;
|
||||
|
||||
template <bool ZERO_RATES>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(Tag_FixRxKokkos_solveSystems<ZERO_RATES>, const int&, CounterType&) const;
|
||||
|
||||
//protected:
|
||||
PairDPDfdtEnergyKokkos<DeviceType>* pairDPDEKK;
|
||||
double VDPD;
|
||||
|
||||
double boltz;
|
||||
double t_stop;
|
||||
|
||||
template <typename T, int stride = 1>
|
||||
struct StridedArrayType
|
||||
{
|
||||
@ -203,6 +236,27 @@ class FixRxKokkos : public FixRX {
|
||||
typename DAT::t_efloat_1d d_dpdThetaLocal, d_sumWeights;
|
||||
typename HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread d_x ;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_type ;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
//double **h_cutsq;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_ilist ;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh ;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_dvector;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_mask ;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace;
|
||||
size_t scratchSpaceSize;
|
||||
|
||||
// Error flag for any failures.
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
|
||||
template <int WT_FLAG, int LOCAL_TEMP_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
|
||||
void computeLocalTemperature();
|
||||
|
||||
@ -213,6 +267,7 @@ class FixRxKokkos : public FixRX {
|
||||
|
||||
//private: // replicate a few from FixRX
|
||||
int my_restartFlag;
|
||||
int nlocal;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -220,6 +220,9 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
FixRX::~FixRX()
|
||||
{
|
||||
printf("Inside FixRX::~FixRX copymode= %d\n", copymode);
|
||||
if (copymode) return;
|
||||
|
||||
// De-Allocate memory to prevent memory leak
|
||||
for (int ii = 0; ii < nreactions; ii++){
|
||||
delete [] stoich[ii];
|
||||
|
||||
Reference in New Issue
Block a user