Merge pull request #2900 from stanmoore1/kk_bugfixes
Fix issues in KOKKOS package
This commit is contained in:
@ -191,22 +191,19 @@ pair_coeff command, to avoid ambiguity in the number of coefficients.
|
||||
|
||||
The keywords *chunksize* and *parallelthresh* are only applicable when
|
||||
using the pair style *snap* with the KOKKOS package on GPUs and are
|
||||
ignored otherwise.
|
||||
The *chunksize* keyword controls
|
||||
the number of atoms in each pass used to compute the bispectrum
|
||||
components and is used to avoid running out of memory. For example
|
||||
if there are 8192 atoms in the simulation and the *chunksize*
|
||||
is set to 4096, the bispectrum calculation will be broken up
|
||||
into two passes (running on a single GPU).
|
||||
The *parallelthresh* keyword controls
|
||||
a crossover threshold for performing extra parallelism. For
|
||||
small systems, exposing additional parallism can be beneficial when
|
||||
there is not enough work to fully saturate the GPU threads otherwise.
|
||||
However, the extra parallelism also leads to more divergence
|
||||
and can hurt performance when the system is already large enough to
|
||||
saturate the GPU threads. Extra parallelism will be performed if the
|
||||
*chunksize* (or total number of atoms per GPU) is smaller than
|
||||
*parallelthresh*.
|
||||
ignored otherwise. The *chunksize* keyword controls the number of atoms
|
||||
in each pass used to compute the bispectrum components and is used to
|
||||
avoid running out of memory. For example if there are 8192 atoms in the
|
||||
simulation and the *chunksize* is set to 4096, the bispectrum
|
||||
calculation will be broken up into two passes (running on a single GPU).
|
||||
The *parallelthresh* keyword controls a crossover threshold for
|
||||
performing extra parallelism. For small systems, exposing additional
|
||||
parallelism can be beneficial when there is not enough work to fully
|
||||
saturate the GPU threads otherwise. However, the extra parallelism also
|
||||
leads to more divergence and can hurt performance when the system is
|
||||
already large enough to saturate the GPU threads. Extra parallelism
|
||||
will be performed if the *chunksize* (or total number of atoms per GPU)
|
||||
is smaller than *parallelthresh*.
|
||||
|
||||
Detailed definitions for all the other keywords
|
||||
are given on the :doc:`compute sna/atom <compute_sna_atom>` doc page.
|
||||
|
||||
@ -406,8 +406,8 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POW
|
||||
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc )
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||
|
||||
@ -30,7 +30,7 @@ class FixFreeze : public Fix {
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
virtual void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
double compute_vector(int);
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
@ -20,6 +19,7 @@ ComputeStyle(temp/deform/kk/host,ComputeTempDeformKokkos<LMPHostType>);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||
#define LMP_COMPUTE_TEMP_DEFORM_KOKKOS_H
|
||||
|
||||
|
||||
@ -28,41 +28,16 @@ FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg)
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = F_MASK | MASK_MASK;
|
||||
datamask_read = F_MASK | MASK_MASK | TORQUE_MASK;
|
||||
datamask_modify = F_MASK | TORQUE_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int FixFreezeKokkos<DeviceType>::setmask()
|
||||
{
|
||||
return FixFreeze::setmask();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::init()
|
||||
{
|
||||
FixFreeze::init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::setup(int vflag)
|
||||
{
|
||||
FixFreeze::setup(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
torque = atomKK->k_torque.view<DeviceType>();
|
||||
@ -80,28 +55,10 @@ void FixFreezeKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
foriginal[0] = original.values[0];
|
||||
foriginal[1] = original.values[1];
|
||||
foriginal[2] = original.values[2];
|
||||
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int /*ilevel*/, int /*iloop*/)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
return components of total force on fix group before force was changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixFreezeKokkos<DeviceType>::compute_vector(int n)
|
||||
{
|
||||
return FixFreeze::compute_vector(n);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {
|
||||
|
||||
@ -31,6 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class FixFreezeKokkos : public FixFreeze {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
struct OriginalForce {
|
||||
double values[3];
|
||||
|
||||
@ -58,12 +59,7 @@ class FixFreezeKokkos : public FixFreeze {
|
||||
};
|
||||
|
||||
FixFreezeKokkos(class LAMMPS *, int, char **);
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void post_force(int);
|
||||
void post_force_respa(int, int, int);
|
||||
double compute_vector(int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, OriginalForce &original) const;
|
||||
|
||||
@ -87,6 +87,9 @@ void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
h_resize() = 1;
|
||||
while (h_resize() > 0) {
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
|
||||
@ -168,6 +171,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
{
|
||||
tag = atomKK->k_tag.view<DeviceType>();
|
||||
|
||||
k_firstflag.sync<DeviceType>();
|
||||
k_firstvalue.sync<DeviceType>();
|
||||
|
||||
int inum = pair->list->inum;
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
@ -185,8 +191,10 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
|
||||
if (maxatom < nlocal || k_list->maxneighs > (int)d_firstflag.extent(1)) {
|
||||
maxatom = nall;
|
||||
d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||
d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||
k_firstflag = DAT::tdual_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
|
||||
k_firstvalue = DAT::tdual_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
|
||||
d_firstflag = k_firstflag.view<DeviceType>();
|
||||
d_firstvalue = k_firstvalue.view<DeviceType>();
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
@ -194,6 +202,9 @@ void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
|
||||
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(inum,f);
|
||||
|
||||
k_firstflag.modify<DeviceType>();
|
||||
k_firstvalue.modify<DeviceType>();
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -50,10 +50,13 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void post_neighbor_item(const int &ii) const;
|
||||
|
||||
typename Kokkos::View<int**> d_firstflag;
|
||||
typename Kokkos::View<LMP_FLOAT**> d_firstvalue;
|
||||
typename DAT::tdual_int_2d k_firstflag;
|
||||
typename DAT::tdual_float_2d k_firstvalue;
|
||||
|
||||
private:
|
||||
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
|
||||
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
|
||||
typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
|
||||
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
|
||||
@ -74,6 +77,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -84,6 +88,7 @@ struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -94,6 +99,7 @@ struct FixNeighHistoryKokkosPreExchangeFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNeighHistoryKokkosPostNeighborFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNeighHistoryKokkos<DeviceType> c;
|
||||
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -31,8 +31,8 @@ FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char *
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK;
|
||||
datamask_modify = X_MASK | V_MASK | OMEGA_MASK;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -61,8 +61,7 @@ void FixNVESphereKokkos<DeviceType>::init()
|
||||
template<class DeviceType>
|
||||
void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
atomKK->sync(execution_space, X_MASK | V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
@ -78,6 +77,8 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
|
||||
atomKK->modified(execution_space, X_MASK | V_MASK | OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -109,8 +110,7 @@ void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
|
||||
template<class DeviceType>
|
||||
void FixNVESphereKokkos<DeviceType>::final_integrate()
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
atomKK->sync(execution_space, V_MASK | OMEGA_MASK| F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK);
|
||||
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
omega = atomKK->k_omega.view<DeviceType>();
|
||||
@ -125,6 +125,8 @@ void FixNVESphereKokkos<DeviceType>::final_integrate()
|
||||
|
||||
FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
|
||||
atomKK->modified(execution_space, V_MASK | OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -56,6 +56,7 @@ class FixNVESphereKokkos : public FixNVESphere {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNVESphereKokkosInitialIntegrateFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNVESphereKokkos<DeviceType> c;
|
||||
FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -66,6 +67,7 @@ struct FixNVESphereKokkosInitialIntegrateFunctor {
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixNVESphereKokkosFinalIntegrateFunctor {
|
||||
typedef DeviceType device_type;
|
||||
FixNVESphereKokkos<DeviceType> c;
|
||||
FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -65,8 +65,6 @@ void FixNVTSllodKokkos<DeviceType>::init()
|
||||
{
|
||||
FixNHKokkos<DeviceType>::init();
|
||||
|
||||
vdelu = typename ArrayTypes<DeviceType>::t_v_array("nvt/sllod/kk:vdelu", atomKK->nlocal);
|
||||
|
||||
if (!this->temperature->tempbias)
|
||||
this->error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
|
||||
|
||||
@ -100,7 +98,7 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
||||
// calculate temperature since some computes require temp
|
||||
// computed on current nlocal atoms to remove bias
|
||||
|
||||
if (nondeformbias){
|
||||
if (nondeformbias) {
|
||||
atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read);
|
||||
this->temperature->compute_scalar();
|
||||
atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify);
|
||||
@ -115,6 +113,9 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
|
||||
|
||||
d_h_two = Few<double, 6>(h_two);
|
||||
|
||||
if (vdelu.extent(0) < atomKK->nmax)
|
||||
vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax);
|
||||
|
||||
this->copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNVTSllod_temp1>(0,nlocal),*this);
|
||||
this->copymode = 0;
|
||||
|
||||
@ -35,6 +35,9 @@ struct TagFixNVTSllod_temp2{};
|
||||
template<class DeviceType>
|
||||
class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
FixNVTSllodKokkos(class LAMMPS *, int, char **);
|
||||
~FixNVTSllodKokkos() {}
|
||||
void init();
|
||||
@ -51,14 +54,14 @@ class FixNVTSllodKokkos : public FixNHKokkos<DeviceType> {
|
||||
void nh_v_temp();
|
||||
|
||||
protected:
|
||||
typename ArrayTypes<DeviceType>::t_x_array x;
|
||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_v_array vdelu;
|
||||
typename ArrayTypes<DeviceType>::t_f_array_const f;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename AT::t_v_array vdelu;
|
||||
typename AT::t_f_array_const f;
|
||||
typename AT::t_float_1d rmass;
|
||||
typename AT::t_float_1d mass;
|
||||
typename AT::t_int_1d type;
|
||||
typename AT::t_int_1d mask;
|
||||
|
||||
Few<double, 6> d_h_two;
|
||||
|
||||
|
||||
@ -45,23 +45,23 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
|
||||
void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
||||
{
|
||||
for (int m = 0; m < nvalue; m++) {
|
||||
if (style[m] == MOLECULE) {
|
||||
if (styles[m] == MOLECULE) {
|
||||
memory->grow(atom->molecule,nmax,"atom:molecule");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
|
||||
memset(&atom->molecule[nmax_old],0,nbytes);
|
||||
} else if (style[m] == CHARGE) {
|
||||
} else if (styles[m] == CHARGE) {
|
||||
memory->grow(atom->q,nmax,"atom:q");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->q[nmax_old],0,nbytes);
|
||||
} else if (style[m] == RMASS) {
|
||||
} else if (styles[m] == RMASS) {
|
||||
memory->grow(atom->rmass,nmax,"atom:rmass");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
memset(&atom->rmass[nmax_old],0,nbytes);
|
||||
} else if (style[m] == INTEGER) {
|
||||
} else if (styles[m] == INTEGER) {
|
||||
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
|
||||
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
||||
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
||||
} else if (style[m] == DOUBLE) {
|
||||
} else if (styles[m] == DOUBLE) {
|
||||
atomKK->sync(Device,DVECTOR_MASK);
|
||||
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
|
||||
"atom:dvector");
|
||||
|
||||
@ -165,8 +165,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
d_neighbors.extent(1) != d_neighbors_touch.extent(1))
|
||||
d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
|
||||
|
||||
d_firsttouch = fix_historyKK->d_firstflag;
|
||||
d_firstshear = fix_historyKK->d_firstvalue;
|
||||
fix_historyKK->k_firstflag.template sync<DeviceType>();
|
||||
fix_historyKK->k_firstvalue.template sync<DeviceType>();
|
||||
|
||||
d_firsttouch = fix_historyKK->k_firstflag.template view<DeviceType>();
|
||||
d_firstshear = fix_historyKK->k_firstvalue.template view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
|
||||
|
||||
@ -258,6 +261,11 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
}
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
|
||||
@ -92,8 +92,8 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
typename Kokkos::View<int**> d_firsttouch;
|
||||
typename Kokkos::View<LMP_FLOAT**> d_firstshear;
|
||||
typename AT::t_int_2d d_firsttouch;
|
||||
typename AT::t_float_2d d_firstshear;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors_touch;
|
||||
typename AT::t_int_1d d_numneigh_touch;
|
||||
|
||||
@ -69,7 +69,7 @@ PairLJCharmmCoulCharmmKokkos<DeviceType>::~PairLJCharmmCoulCharmmKokkos()
|
||||
if (allocated) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
k_cutsq = DAT::tdual_ffloat_2d();
|
||||
memoryKK->destroy_kokkos(k_cutsq,cutsq);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
|
||||
@ -92,40 +92,22 @@ void VerletKokkos::setup(int flag)
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
atomKK->setup();
|
||||
atom->setup();
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
|
||||
comm->exchange();
|
||||
|
||||
if (atomKK->sortfreq > 0) atomKK->sort();
|
||||
|
||||
if (atom->sortfreq > 0) atom->sort();
|
||||
comm->borders();
|
||||
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build(1);
|
||||
modify->setup_post_neighbor();
|
||||
neighbor->ncalls = 0;
|
||||
@ -144,7 +126,7 @@ void VerletKokkos::setup(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular != Atom::ATOMIC) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
@ -200,35 +182,21 @@ void VerletKokkos::setup_minimal(int flag)
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
if (flag) {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
comm->exchange();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build(1);
|
||||
modify->setup_post_neighbor();
|
||||
neighbor->ncalls = 0;
|
||||
@ -247,7 +215,7 @@ void VerletKokkos::setup_minimal(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular != Atom::ATOMIC) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
|
||||
@ -23,7 +23,7 @@ ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
KOKKOS_DEVICES = Cuda
|
||||
KOKKOS_ARCH = Kepler35
|
||||
KOKKOS_ARCH = Volta70
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
|
||||
Reference in New Issue
Block a user