diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index a55c9568e3..5f42f53bd5 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -44,7 +44,7 @@ OPT. * :doc:`born/coul/wolf/cs (g) ` * :doc:`born/gauss ` * :doc:`bpm/spring ` - * :doc:`brownian (o) ` + * :doc:`brownian (ko) ` * :doc:`brownian/poly (o) ` * :doc:`buck (giko) ` * :doc:`buck/coul/cut (giko) ` diff --git a/doc/src/pair_brownian.rst b/doc/src/pair_brownian.rst index 9fec789ba0..a740952a5c 100644 --- a/doc/src/pair_brownian.rst +++ b/doc/src/pair_brownian.rst @@ -1,12 +1,13 @@ .. index:: pair_style brownian .. index:: pair_style brownian/omp +.. index:: pair_style brownian/kk .. index:: pair_style brownian/poly .. index:: pair_style brownian/poly/omp pair_style brownian command =========================== -Accelerator Variants: *brownian/omp* +Accelerator Variants: *brownian/omp*, *brownian/kk* pair_style brownian/poly command ================================ diff --git a/src/COLLOID/pair_brownian.cpp b/src/COLLOID/pair_brownian.cpp index 1b136cf703..6773900e44 100644 --- a/src/COLLOID/pair_brownian.cpp +++ b/src/COLLOID/pair_brownian.cpp @@ -54,6 +54,8 @@ PairBrownian::PairBrownian(LAMMPS *lmp) : Pair(lmp) PairBrownian::~PairBrownian() { + if (copymode) return; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/COLLOID/pair_brownian.h b/src/COLLOID/pair_brownian.h index a6a08db699..0bb4e2188b 100644 --- a/src/COLLOID/pair_brownian.h +++ b/src/COLLOID/pair_brownian.h @@ -31,7 +31,7 @@ class PairBrownian : public Pair { void compute(int, int) override; void settings(int, char **) override; void coeff(int, char **) override; - double init_one(int, int) override; + virtual double init_one(int, int) override; void init_style() override; void write_restart(FILE *) override; void read_restart(FILE *) override; @@ -55,7 +55,7 @@ class PairBrownian : public Pair { class RanMars *random; void set_3_orthogonal_vectors(double *, double *, double *); - void allocate(); + virtual void allocate(); }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 31359d4e4a..ee2e8e61fe 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -277,6 +277,8 @@ action npair_trim_kokkos.h action pack_kokkos.h pack.h action pair_adp_kokkos.cpp pair_adp.cpp action pair_adp_kokkos.h pair_adp.h +action pair_brownian_kokkos.cpp pair_brownian.cpp +action pair_brownian_kokkos.h pair_brownian.h action pair_buck_coul_cut_kokkos.cpp action pair_buck_coul_cut_kokkos.h action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp index 03aba5b10d..54acf58a21 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.cpp +++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp @@ -190,10 +190,20 @@ void ComputeTempDeformKokkos::operator()(TagComputeTempDeformVector< } /* ---------------------------------------------------------------------- */ + template void ComputeTempDeformKokkos::remove_bias_all() { - atomKK->sync(execution_space,datamask_read); + remove_bias_all_kk(); + atomKK->sync(Host,V_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template +void ComputeTempDeformKokkos::remove_bias_all_kk() +{ + atomKK->sync(execution_space,X_MASK|V_MASK); v = atomKK->k_v.view(); x = atomKK->k_x.view(); mask = atomKK->k_mask.view(); @@ -214,6 +224,8 @@ void ComputeTempDeformKokkos::remove_bias_all() copymode = 0; domainKK->lamda2x(nlocal); + + atomKK->modified(execution_space,V_MASK); } template @@ -230,18 +242,20 @@ void ComputeTempDeformKokkos::operator()(TagComputeTempDeformRemoveB } /* ---------------------------------------------------------------------- */ + template void ComputeTempDeformKokkos::restore_bias_all() { - atomKK->sync(execution_space,datamask_read); + atomKK->sync(execution_space,V_MASK); v = atomKK->k_v.view(); - x = atomKK->k_x.view(); mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; + + atomKK->modified(execution_space,V_MASK); } template diff --git a/src/KOKKOS/compute_temp_deform_kokkos.h b/src/KOKKOS/compute_temp_deform_kokkos.h index 2880314b55..b7584e6e14 100644 --- a/src/KOKKOS/compute_temp_deform_kokkos.h +++ b/src/KOKKOS/compute_temp_deform_kokkos.h @@ -69,6 +69,7 @@ class ComputeTempDeformKokkos: public ComputeTempDeform { double compute_scalar() override; void compute_vector() override; void remove_bias_all() override; + void remove_bias_all_kk() override; void restore_bias_all() override; template diff --git a/src/KOKKOS/fix_deform_kokkos.cpp b/src/KOKKOS/fix_deform_kokkos.cpp index af9c154876..9b279fa91d 100644 --- a/src/KOKKOS/fix_deform_kokkos.cpp +++ b/src/KOKKOS/fix_deform_kokkos.cpp @@ -55,7 +55,11 @@ void FixDeformKokkos::pre_exchange() void FixDeformKokkos::end_of_step() { - atomKK->sync(Host,ALL_MASK); + if (remapflag == Domain::X_REMAP && rfix.size() > 0) + atomKK->sync(Host,ALL_MASK); + FixDeform::end_of_step(); - atomKK->modified(Host,ALL_MASK); + + if (remapflag == Domain::X_REMAP && rfix.size() > 0) + atomKK->modified(Host,ALL_MASK); } diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index e60b1f0ec6..546f204de6 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -86,7 +86,6 @@ FixLangevinKokkos::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a execution_space = ExecutionSpaceFromDevice::space; datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK; datamask_modify = F_MASK; - } /* ---------------------------------------------------------------------- */ @@ -227,12 +226,16 @@ void FixLangevinKokkos::post_force(int /*vflag*/) // account for bias velocity if (tbiasflag == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->compute_scalar(); - temperature->remove_bias_all(); // modifies velocities - // if temeprature compute is kokkosized host-device comm won't be needed - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); + if (temperature->kokkosable) { + temperature->compute_scalar(); + temperature->remove_bias_all_kk(); + } else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->compute_scalar(); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } // compute langevin force in parallel on the device @@ -526,10 +529,13 @@ void FixLangevinKokkos::post_force(int /*vflag*/) if (tbiasflag == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->restore_bias_all(); // modifies velocities - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } // set modify flags for the views modified in post_force functor diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index b3614488b5..ff67f63b0d 100644 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -80,7 +80,11 @@ void FixNHKokkos::setup(int /*vflag*/) { // tdof needed by compute_temp_target() + atomKK->sync(temperature->execution_space,temperature->datamask_read); t_current = temperature->compute_scalar(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + tdof = temperature->dof; // t_target is needed by NPH and NPT in compute_scalar() @@ -105,6 +109,7 @@ void FixNHKokkos::setup(int /*vflag*/) atomKK->sync(temperature->execution_space,temperature->datamask_read); t0 = temperature->compute_scalar(); atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); if (t0 < EPSILON) error->all(FLERR,"Current temperature too close to zero, consider using ptemp keyword"); } @@ -117,6 +122,8 @@ void FixNHKokkos::setup(int /*vflag*/) atomKK->sync(temperature->execution_space,temperature->datamask_read); t_current = temperature->compute_scalar(); atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + tdof = temperature->dof; if (pstat_flag) { @@ -194,9 +201,7 @@ void FixNHKokkos::initial_integrate(int /*vflag*/) if (pstat_flag) { atomKK->sync(temperature->execution_space,temperature->datamask_read); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - //atomKK->sync(pressure->execution_space,pressure->datamask_read); - //atomKK->modified(pressure->execution_space,pressure->datamask_modify); + atomKK->sync(pressure->execution_space,pressure->datamask_read); if (pstyle == ISO) { temperature->compute_scalar(); pressure->compute_scalar(); @@ -204,6 +209,10 @@ void FixNHKokkos::initial_integrate(int /*vflag*/) temperature->compute_vector(); pressure->compute_vector(); } + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->modified(pressure->execution_space,pressure->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,pressure->datamask_modify); couple(); pressure->addstep(update->ntimestep+1); } @@ -250,6 +259,7 @@ void FixNHKokkos::final_integrate() atomKK->sync(temperature->execution_space,temperature->datamask_read); t_current = temperature->compute_scalar(); atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); } if (pstat_flag) nh_v_press(); @@ -260,15 +270,24 @@ void FixNHKokkos::final_integrate() atomKK->sync(temperature->execution_space,temperature->datamask_read); t_current = temperature->compute_scalar(); atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); tdof = temperature->dof; if (pstat_flag) { - //atomKK->sync(pressure->execution_space,pressure->datamask_read); - //atomKK->modified(pressure->execution_space,pressure->datamask_modify); - if (pstyle == ISO) pressure->compute_scalar(); - else { + if (pstyle == ISO) { + atomKK->sync(pressure->execution_space,pressure->datamask_read); + pressure->compute_scalar(); + atomKK->modified(pressure->execution_space,pressure->datamask_modify); + atomKK->sync(execution_space,pressure->datamask_modify); + } else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + atomKK->sync(pressure->execution_space,pressure->datamask_read); temperature->compute_vector(); pressure->compute_vector(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->modified(pressure->execution_space,pressure->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,pressure->datamask_modify); } couple(); pressure->addstep(update->ntimestep+1); @@ -480,9 +499,13 @@ void FixNHKokkos::nh_v_press() factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2)); if (which == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->remove_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->remove_bias_all_kk(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } atomKK->sync(execution_space,V_MASK | MASK_MASK); @@ -497,11 +520,14 @@ void FixNHKokkos::nh_v_press() atomKK->modified(execution_space,V_MASK); if (which == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->restore_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } - } template @@ -617,9 +643,13 @@ void FixNHKokkos::nh_v_temp() if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; if (which == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->remove_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->remove_bias_all_kk(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } atomKK->sync(execution_space,V_MASK | MASK_MASK); @@ -631,9 +661,13 @@ void FixNHKokkos::nh_v_temp() atomKK->modified(execution_space,V_MASK); if (which == BIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->restore_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } } diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp index ddcc0c728c..42f78270ad 100644 --- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp @@ -68,6 +68,10 @@ FixNVTSllodKokkos::FixNVTSllodKokkos(LAMMPS *lmp, int narg, char **a this->modify->add_compute(fmt::format("{} {} temp/deform/kk",this->id_temp,this->group->names[this->igroup])); this->tcomputeflag = 1; this->nondeformbias = 0; + + this->execution_space = ExecutionSpaceFromDevice::space; + this->datamask_read = EMPTY_MASK; + this->datamask_modify = EMPTY_MASK; } /* ---------------------------------------------------------------------- */ @@ -114,6 +118,7 @@ void FixNVTSllodKokkos::nh_v_temp() atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); this->temperature->compute_scalar(); atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + atomKK->sync(this->execution_space,this->temperature->datamask_modify); } v = atomKK->k_v.view(); @@ -130,9 +135,13 @@ void FixNVTSllodKokkos::nh_v_temp() vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax); if (!this->psllod_flag) { - atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); - this->temperature->remove_bias_all(); - atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + if (this->temperature->kokkosable) this->temperature->remove_bias_all_kk(); + else { + atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); + this->temperature->remove_bias_all(); + atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + atomKK->sync(this->execution_space,this->temperature->datamask_modify); + } } atomKK->sync(this->execution_space,V_MASK | MASK_MASK); @@ -142,9 +151,13 @@ void FixNVTSllodKokkos::nh_v_temp() this->copymode = 0; if (this->psllod_flag) { - atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); - this->temperature->remove_bias_all(); - atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + if (this->temperature->kokkosable) this->temperature->remove_bias_all_kk(); + else { + atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); + this->temperature->remove_bias_all(); + atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + atomKK->sync(this->execution_space,this->temperature->datamask_modify); + } } atomKK->sync(this->execution_space,V_MASK | MASK_MASK); @@ -155,9 +168,13 @@ void FixNVTSllodKokkos::nh_v_temp() atomKK->modified(this->execution_space,V_MASK); - atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); - this->temperature->restore_bias_all(); - atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + if (this->temperature->kokkosable) this->temperature->restore_bias_all(); + else { + atomKK->sync(this->temperature->execution_space,this->temperature->datamask_read); + this->temperature->restore_bias_all(); + atomKK->modified(this->temperature->execution_space,this->temperature->datamask_modify); + atomKK->sync(this->execution_space,this->temperature->datamask_modify); + } } template diff --git a/src/KOKKOS/fix_temp_berendsen_kokkos.cpp b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp index 8aaf586194..32d04a58b3 100644 --- a/src/KOKKOS/fix_temp_berendsen_kokkos.cpp +++ b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp @@ -97,10 +97,13 @@ void FixTempBerendsenKokkos::end_of_step() auto groupbit = this->groupbit; if (which == NOBIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->remove_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->remove_bias_all_kk(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } atomKK->sync(execution_space,V_MASK|MASK_MASK); @@ -116,10 +119,13 @@ void FixTempBerendsenKokkos::end_of_step() atomKK->modified(execution_space,V_MASK); if (which == NOBIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->restore_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_temp_rescale_kokkos.cpp b/src/KOKKOS/fix_temp_rescale_kokkos.cpp index 5c295634e7..02ba4ff1e2 100644 --- a/src/KOKKOS/fix_temp_rescale_kokkos.cpp +++ b/src/KOKKOS/fix_temp_rescale_kokkos.cpp @@ -99,10 +99,13 @@ void FixTempRescaleKokkos::end_of_step() auto groupbit = this->groupbit; if (which == NOBIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->remove_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); + if (temperature->kokkosable) temperature->remove_bias_all_kk(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->remove_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } atomKK->sync(execution_space,V_MASK|MASK_MASK); @@ -118,11 +121,13 @@ void FixTempRescaleKokkos::end_of_step() atomKK->modified(execution_space,V_MASK); if (which == NOBIAS) { - atomKK->sync(temperature->execution_space,temperature->datamask_read); - temperature->restore_bias_all(); - atomKK->modified(temperature->execution_space,temperature->datamask_modify); - atomKK->sync(execution_space,temperature->datamask_modify); - + if (temperature->kokkosable) temperature->restore_bias_all(); + else { + atomKK->sync(temperature->execution_space,temperature->datamask_read); + temperature->restore_bias_all(); + atomKK->modified(temperature->execution_space,temperature->datamask_modify); + atomKK->sync(execution_space,temperature->datamask_modify); + } } } } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index b3edb0e6a0..00676c7bf8 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -633,15 +633,24 @@ void KokkosLMP::accelerator(int narg, char **arg) // set neighbor binsize, same as neigh_modify command force->newton = force->newton_pair = force->newton_bond = newtonflag; - - if (neigh_thread && newtonflag) - error->all(FLERR,"Must use KOKKOS package option 'newton off' with 'neigh/thread on'"); + newton_check(); neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; else neighbor->binsizeflag = 1; } +/* ---------------------------------------------------------------------- */ + +void KokkosLMP::newton_check() +{ + if (neighflag == FULL && force->newton) + error->all(FLERR,"Must use 'newton off' with KOKKOS package option 'neigh full'"); + + if (neigh_thread && force->newton) + error->all(FLERR,"Must use 'newton off' with KOKKOS package option 'neigh/thread on'"); +} + /* ---------------------------------------------------------------------- called by Finish ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 419de62dec..f88fb6e685 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -64,6 +64,7 @@ class KokkosLMP : protected Pointers { static void initialize(const Kokkos::InitializationSettings&, Error *); static void finalize(); void accelerator(int, char **); + void newton_check(); bigint neigh_count(int); template diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 44e9e355b9..4fec623c5d 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -1438,7 +1438,6 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP for (int k = 0; k < nstencil; k++) { const int jbin = ibin + stencil[k]; - if (ibin == jbin) continue; if (HalfNeigh && Newton && !Tri && (ibin == jbin)) continue; bincount_current = c_bincount[jbin]; diff --git a/src/KOKKOS/pair_brownian_kokkos.cpp b/src/KOKKOS/pair_brownian_kokkos.cpp new file mode 100644 index 0000000000..406ec033a5 --- /dev/null +++ b/src/KOKKOS/pair_brownian_kokkos.cpp @@ -0,0 +1,634 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_brownian_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain_kokkos.h" +#include "error.h" +#include "fix.h" +#include "fix_wall.h" +#include "force.h" +#include "input.h" +#include "kokkos.h" +#include "math_const.h" +#include "math_special_kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" +#include "variable.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecialKokkos; + +// same as fix_wall.cpp + +enum { EDGE, CONSTANT, VARIABLE }; + +/* ---------------------------------------------------------------------- */ + +template +PairBrownianKokkos::PairBrownianKokkos(LAMMPS *lmp) : PairBrownian(lmp), + rand_pool(seed + comm->me) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | VIRIAL_MASK | RADIUS_MASK; + datamask_modify = F_MASK | TORQUE_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairBrownianKokkos::~PairBrownianKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cut_inner,cut_inner); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairBrownianKokkos::init_style() +{ + PairBrownian::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBrownianKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // This section of code adjusts R0/RT0/RS0 if necessary due to changes + // in the volume fraction as a result of fix deform or moving walls + + double dims[3], wallcoord; + if (flagVF) // Flag for volume fraction corrections + if (flagdeform || flagwall == 2) { // Possible changes in volume fraction + if (flagdeform && !flagwall) + for (int j = 0; j < 3; j++) dims[j] = domain->prd[j]; + else if (flagwall == 2 || (flagdeform && flagwall == 1)) { + double wallhi[3], walllo[3]; + for (int j = 0; j < 3; j++) { + wallhi[j] = domain->prd[j]; + walllo[j] = 0; + } + for (int m = 0; m < wallfix->nwall; m++) { + int dim = wallfix->wallwhich[m] / 2; + int side = wallfix->wallwhich[m] % 2; + if (wallfix->xstyle[m] == VARIABLE) { + wallcoord = input->variable->compute_equal(wallfix->xindex[m]); + } else + wallcoord = wallfix->coord0[m]; + if (side == 0) + walllo[dim] = wallcoord; + else + wallhi[dim] = wallcoord; + } + for (int j = 0; j < 3; j++) dims[j] = wallhi[j] - walllo[j]; + } + double vol_T = dims[0] * dims[1] * dims[2]; + double vol_f = vol_P / vol_T; + if (flaglog == 0) { + R0 = 6 * MY_PI * mu * rad * (1.0 + 2.16 * vol_f); + RT0 = 8 * MY_PI * mu * cube(rad); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); + } else { + R0 = 6 * MY_PI * mu * rad * (1.0 + 2.725 * vol_f - 6.583 * vol_f * vol_f); + RT0 = 8 * MY_PI * mu * cube(rad) * (1.0 + 0.749 * vol_f - 2.469 * vol_f * vol_f); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); + } + } + + // scale factor for Brownian moments + + prethermostat = sqrt(24.0 * force->boltz * t_target / update->dt); + prethermostat *= sqrt(force->vxmu2f / force->ftm2v / force->mvv2e); + + // reallocate per-atom arrays if necessary + + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_cut_inner.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK | TORQUE_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + torque = atomKK->k_torque.view(); + type = atomKK->k_type.view(); + radius = atomKK->k_radius.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + vxmu2f = force->vxmu2f; + + // loop over neighbors of my atoms + + int inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + copymode = 1; + + EV_FLOAT ev; + + if (flagfld) { // FLAGFLD == 1 + if (vflag_either) { // VFLAG == 1 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else { // VFLAG==0 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } else { // FLAGFLD == 0 + if (evflag) { // VFLAG== 1 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else { // VFLAG == 0 + if (neighflag == HALF) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == FULL) { + if (newton_pair) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } + + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairBrownianKokkos::operator()(TagPairBrownianCompute, const int ii, EV_FLOAT &ev) const { + + // The f and torque arrays are atomic for Half/Thread neighbor style + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; + Kokkos::View::value,Kokkos::MemoryTraits::value> > a_torque = torque; + + rand_type rand_gen = rand_pool.get_state(); + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type[i]; + const LMP_FLOAT radi = radius[i]; + const int jnum = d_numneigh[i]; + + LMP_FLOAT a_sq, a_sh, a_pu; + LMP_FLOAT xl[3], p1[3], p2[3], p3[3]; + + F_FLOAT fx_i = 0.0; + F_FLOAT fy_i = 0.0; + F_FLOAT fz_i = 0.0; + + F_FLOAT torquex_i = 0.0; + F_FLOAT torquey_i = 0.0; + F_FLOAT torquez_i = 0.0; + + if (FLAGFLD) { + fx_i = prethermostat * sqrt(R0) * (rand_gen.drand() - 0.5); + fy_i = prethermostat * sqrt(R0) * (rand_gen.drand() - 0.5); + fz_i = prethermostat * sqrt(R0) * (rand_gen.drand() - 0.5); + if (flaglog) { + torquex_i = prethermostat * sqrt(RT0) * (rand_gen.drand() - 0.5); + torquey_i = prethermostat * sqrt(RT0) * (rand_gen.drand() - 0.5); + torquez_i = prethermostat * sqrt(RT0) * (rand_gen.drand() - 0.5); + } + } + + if (flagHI) { + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const int jtype = type[j]; + + if(rsq < d_cutsq(itype,jtype)) { + + const LMP_FLOAT r = sqrt(rsq); + + // scalar resistances a_sq and a_sh + + LMP_FLOAT h_sep = r - 2.0 * radi; + + // if less than minimum gap, use minimum gap instead + + if (r < d_cut_inner(itype,jtype)) h_sep = d_cut_inner(itype,jtype) - 2.0 * radi; + + // scale h_sep by radi + + h_sep = h_sep / radi; + + if (flaglog) { + a_sq = 6.0 * MY_PI * mu * radi * (1.0 / 4.0 / h_sep + 9.0 / 40.0 * log(1.0 / h_sep)); + a_sh = 6.0 * MY_PI * mu * radi * (1.0 / 6.0 * log(1.0 / h_sep)); + a_pu = 8.0 * MY_PI * mu * cube(radi) * (3.0 / 160.0 * log(1.0 / h_sep)); + } else + a_sq = 6.0 * MY_PI * mu * radi * (1.0 / 4.0 / h_sep); + + // generate the Pairwise Brownian Force: a_sq + + LMP_FLOAT Fbmag = prethermostat * sqrt(a_sq); + + // generate a random number + + LMP_FLOAT randr = rand_gen.drand() - 0.5; + + // contribution due to Brownian motion + + F_FLOAT fx = Fbmag * randr * delx / r; + F_FLOAT fy = Fbmag * randr * dely / r; + F_FLOAT fz = Fbmag * randr * delz / r; + + // add terms due to a_sh + + if (flaglog) { + + // generate two orthogonal vectors to the line of centers + + p1[0] = delx / r; + p1[1] = dely / r; + p1[2] = delz / r; + set_3_orthogonal_vectors(p1, p2, p3); + + // magnitude + + Fbmag = prethermostat * sqrt(a_sh); + + // force in each of the two directions + + randr = rand_gen.drand() - 0.5; + + fx += Fbmag * randr * p2[0]; + fy += Fbmag * randr * p2[1]; + fz += Fbmag * randr * p2[2]; + + randr = rand_gen.drand() - 0.5; + + fx += Fbmag * randr * p3[0]; + fy += Fbmag * randr * p3[1]; + fz += Fbmag * randr * p3[2]; + } + + // scale forces to appropriate units + + fx = vxmu2f * fx; + fy = vxmu2f * fy; + fz = vxmu2f * fz; + + // sum to total force + + fx_i -= fx; + fy_i -= fy; + fz_i -= fz; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) += fx; + a_f(j,1) += fy; + a_f(j,2) += fz; + } + + // torque due to the Brownian Force + + if (flaglog) { + + // location of the point of closest approach on I from its center + + xl[0] = -delx / r * radi; + xl[1] = -dely / r * radi; + xl[2] = -delz / r * radi; + + // torque = xl_cross_F + + F_FLOAT tx = xl[1] * fz - xl[2] * fy; + F_FLOAT ty = xl[2] * fx - xl[0] * fz; + F_FLOAT tz = xl[0] * fy - xl[1] * fx; + + // torque is same on both particles + + torquex_i -= tx; + torquey_i -= ty; + torquez_i -= tz; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_torque(j,0) -= tx; + a_torque(j,1) -= ty; + a_torque(j,2) -= tz; + } + + // torque due to a_pu + + Fbmag = prethermostat * sqrt(a_pu); + + // force in each direction + + randr = rand_gen.drand() - 0.5; + + tx = Fbmag * randr * p2[0]; + ty = Fbmag * randr * p2[1]; + tz = Fbmag * randr * p2[2]; + + randr = rand_gen.drand() - 0.5; + + tx += Fbmag * randr * p3[0]; + ty += Fbmag * randr * p3[1]; + tz += Fbmag * randr * p3[2]; + + // torque has opposite sign on two particles + + torquex_i -= tx; + torquey_i -= ty; + torquez_i -= tz; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_torque(j,0) += tx; + a_torque(j,1) += ty; + a_torque(j,2) += tz; + } + } + + if (VFLAG) + ev_tally_xyz(ev, i, j, -fx, -fy, -fz, delx, dely, delz); + } + } + + } // if(flagHI) + + rand_pool.free_state(rand_gen); + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_torque(i,0) += torquex_i; + a_torque(i,1) += torquey_i; + a_torque(i,2) += torquez_i; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairBrownianKokkos::operator()(TagPairBrownianCompute, const int ii) const { + EV_FLOAT ev; + this->template operator()(TagPairBrownianCompute(), ii, ev); +} + + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairBrownianKokkos::ev_tally_xyz(EV_FLOAT & ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const +{ + Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); + + const F_FLOAT v0 = delx*fx; + const F_FLOAT v1 = dely*fy; + const F_FLOAT v2 = delz*fz; + const F_FLOAT v3 = delx*fy; + const F_FLOAT v4 = delx*fz; + const F_FLOAT v5 = dely*fz; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + if (NEWTON_PAIR) { // neigh half, newton on + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { // neigh half, newton off + if (i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } else { //neigh full + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + + if (NEIGHFLAG == FULL || NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEIGHFLAG != FULL && (NEWTON_PAIR || j < nlocal)) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairBrownianKokkos::allocate() +{ + PairBrownian::allocate(); + + int n = atom->ntypes; + + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + memory->destroy(cut_inner); + memoryKK->create_kokkos(k_cut_inner,cut_inner,n+1,n+1,"pair:cut_inner"); + d_cut_inner = k_cut_inner.template view(); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairBrownianKokkos::settings(int narg, char **arg) +{ + if (narg != 7 && narg != 9) error->all(FLERR, "Illegal pair_style command"); + + PairBrownian::settings(narg,arg); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairBrownianKokkos::init_one(int i, int j) +{ + double cutone = PairBrownian::init_one(i,j); + double cutinnerm = cut_inner[i][j]; + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify(); + + k_cut_inner.h_view(i,j) = k_cut_inner.h_view(j,i) = cutinnerm; + k_cut_inner.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairBrownianKokkos::coeff(int narg, char **arg) +{ + PairBrownian::coeff(narg,arg); +} + +namespace LAMMPS_NS { +template class PairBrownianKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairBrownianKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_brownian_kokkos.h b/src/KOKKOS/pair_brownian_kokkos.h new file mode 100644 index 0000000000..2247fd7ba0 --- /dev/null +++ b/src/KOKKOS/pair_brownian_kokkos.h @@ -0,0 +1,160 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(brownian/kk,PairBrownianKokkos); +PairStyle(brownian/kk/device,PairBrownianKokkos); +PairStyle(brownian/kk/host,PairBrownianKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_BROWNIAN_KOKKOS_H +#define LMP_PAIR_BROWNIAN_KOKKOS_H + +#include "pair_brownian.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" +#include "kokkos_base.h" +#include "Kokkos_Random.hpp" +#include "comm_kokkos.h" + +namespace LAMMPS_NS { + +template +struct TagPairBrownianCompute {}; + +template +class PairBrownianKokkos : public PairBrownian, public KokkosBase { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairBrownianKokkos(class LAMMPS *); + ~PairBrownianKokkos() override; + void compute(int, int) override; + void coeff(int, char **) override; + void settings(int, char **) override; + void init_style() override; + double init_one(int, int) override; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairBrownianCompute, const int, EV_FLOAT &ev) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairBrownianCompute, const int) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally_xyz(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; + + protected: + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread radius; + + DAT::tdual_virial_array k_vatom; + typename AT::t_virial_array d_vatom; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + int newton_pair; + double special_lj[4]; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + typename AT::tdual_ffloat_2d k_cut_inner; + typename AT::t_ffloat_2d d_cut_inner; + + int neighflag; + int nlocal,nall,eflag,vflag; + LMP_FLOAT vxmu2f; + + LMP_FLOAT prethermostat; + + void allocate() override; + + KOKKOS_INLINE_FUNCTION + void set_3_orthogonal_vectors(const double p1[3], double * const p2, double * const p3) const { + double norm; + int ix, iy, iz; + + // find the index of maximum magnitude and store it in iz + + if (fabs(p1[0]) > fabs(p1[1])) { + iz = 0; + ix = 1; + iy = 2; + } else { + iz = 1; + ix = 2; + iy = 0; + } + + if (iz == 0) { + if (fabs(p1[0]) < fabs(p1[2])) { + iz = 2; + ix = 0; + iy = 1; + } + } else { + if (fabs(p1[1]) < fabs(p1[2])) { + iz = 2; + ix = 0; + iy = 1; + } + } + + // set p2 arbitrarily such that it's orthogonal to p1 + + p2[ix] = 1.0; + p2[iy] = 1.0; + p2[iz] = -(p1[ix] * p2[ix] + p1[iy] * p2[iy]) / p1[iz]; + + // normalize p2 + + norm = sqrt(p2[0] * p2[0] + p2[1] * p2[1] + p2[2] * p2[2]); + + p2[0] = p2[0] / norm; + p2[1] = p2[1] / norm; + p2[2] = p2[2] / norm; + + // Set p3 by taking the cross product p3=p2xp1 + + p3[0] = p1[1] * p2[2] - p1[2] * p2[1]; + p3[1] = p1[2] * p2[0] - p1[0] * p2[2]; + p3[2] = p1[0] * p2[1] - p1[1] * p2[0]; + }; + + friend void pair_virial_fdotr_compute(PairBrownianKokkos*); + + Kokkos::Random_XorShift64_Pool rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; +}; + +} + +#endif +#endif + diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index 35d779f36f..ef797a797f 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -168,13 +168,7 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) if (neighflag == HALF) { if (force->newton_pair) { - if (vflag_atom) { - if (shearupdate) { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } - } else if (vflag_global) { + if (vflag_either) { if (shearupdate) { Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this, ev); } else { @@ -188,13 +182,7 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) } } } else { - if (vflag_atom) { - if (shearupdate) { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } - } else if (vflag_global) { + if (vflag_either) { if (shearupdate) { Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this, ev); } else { @@ -210,13 +198,7 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) } } else { // HALFTHREAD if (force->newton_pair) { - if (vflag_atom) { - if (shearupdate) { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } - } else if (vflag_global) { + if (vflag_either) { if (shearupdate) { Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this, ev); } else { @@ -230,13 +212,7 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) } } } else { - if (vflag_atom) { - if (shearupdate) { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy>(0,inum),*this); - } - } else if (vflag_global) { + if (vflag_either) { if (shearupdate) { Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,inum),*this, ev); } else { @@ -277,9 +253,9 @@ void PairGranHookeHistoryKokkos::compute(int eflag_in, int vflag_in) } template -template +template KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryCompute, const int ii, EV_FLOAT &ev) const { +void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryCompute, const int ii, EV_FLOAT &ev) const { // The f and torque arrays are atomic for Half/Thread neighbor style Kokkos::View::value,Kokkos::MemoryTraits::value> > a_f = f; @@ -466,10 +442,8 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC a_torque(j,2) -= jrad*tor3; } - if (EVFLAG == 2) - ev_tally_xyz_atom(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); - if (EVFLAG == 1) - ev_tally_xyz(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); + if (VFLAG) + ev_tally_xyz(ev, i, j, fx, fy, fz, delx, dely, delz); } a_f(i,0) += fx_i; @@ -481,89 +455,75 @@ void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryC } template -template +template KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryCompute, const int ii) const { +void PairGranHookeHistoryKokkos::operator()(TagPairGranHookeHistoryCompute, const int ii) const { EV_FLOAT ev; - this->template operator()(TagPairGranHookeHistoryCompute(), ii, ev); -} - -template -template -KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::ev_tally_xyz(EV_FLOAT &ev, int i, int j, - F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, - X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const -{ - F_FLOAT v[6]; - - v[0] = delx*fx; - v[1] = dely*fy; - v[2] = delz*fz; - v[3] = delx*fy; - v[4] = delx*fz; - v[5] = dely*fz; - - if (NEWTON_PAIR) { - ev.v[0] += v[0]; - ev.v[1] += v[1]; - ev.v[2] += v[2]; - ev.v[3] += v[3]; - ev.v[4] += v[4]; - ev.v[5] += v[5]; - } else { - if (i < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; - } - if (j < nlocal) { - ev.v[0] += 0.5*v[0]; - ev.v[1] += 0.5*v[1]; - ev.v[2] += 0.5*v[2]; - ev.v[3] += 0.5*v[3]; - ev.v[4] += 0.5*v[4]; - ev.v[5] += 0.5*v[5]; - } - } + this->template operator()(TagPairGranHookeHistoryCompute(), ii, ev); } template template KOKKOS_INLINE_FUNCTION -void PairGranHookeHistoryKokkos::ev_tally_xyz_atom(EV_FLOAT & /*ev*/, int i, int j, - F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, - X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const +void PairGranHookeHistoryKokkos::ev_tally_xyz(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const { Kokkos::View::value,Kokkos::MemoryTraits::value> > v_vatom = k_vatom.view(); - F_FLOAT v[6]; + const F_FLOAT v0 = delx*fx; + const F_FLOAT v1 = dely*fy; + const F_FLOAT v2 = delz*fz; + const F_FLOAT v3 = delx*fy; + const F_FLOAT v4 = delx*fz; + const F_FLOAT v5 = dely*fz; - v[0] = delx*fx; - v[1] = dely*fy; - v[2] = delz*fz; - v[3] = delx*fy; - v[4] = delx*fz; - v[5] = dely*fz; - - if (NEWTON_PAIR || i < nlocal) { - v_vatom(i,0) += 0.5*v[0]; - v_vatom(i,1) += 0.5*v[1]; - v_vatom(i,2) += 0.5*v[2]; - v_vatom(i,3) += 0.5*v[3]; - v_vatom(i,4) += 0.5*v[4]; - v_vatom(i,5) += 0.5*v[5]; + if (vflag_global) { + if (NEWTON_PAIR) { // neigh half, newton on + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { // neigh half, newton off + if (i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } } - if (NEWTON_PAIR || j < nlocal) { - v_vatom(j,0) += 0.5*v[0]; - v_vatom(j,1) += 0.5*v[1]; - v_vatom(j,2) += 0.5*v[2]; - v_vatom(j,3) += 0.5*v[3]; - v_vatom(j,4) += 0.5*v[4]; - v_vatom(j,5) += 0.5*v[5]; + + if (vflag_atom) { + + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } } } diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index 4f98b00f2a..d2bd1d96d5 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -32,7 +32,7 @@ namespace LAMMPS_NS { template class FixNeighHistoryKokkos; -template +template struct TagPairGranHookeHistoryCompute {}; template @@ -47,23 +47,18 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { void compute(int, int) override; void init_style() override; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairGranHookeHistoryCompute, const int, EV_FLOAT &ev) const; - template + void operator()(TagPairGranHookeHistoryCompute, const int, EV_FLOAT &ev) const; + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairGranHookeHistoryCompute, const int) const; + void operator()(TagPairGranHookeHistoryCompute, const int) const; - template + template KOKKOS_INLINE_FUNCTION void ev_tally_xyz(EV_FLOAT &ev, int i, int j, F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; - template - KOKKOS_INLINE_FUNCTION - void ev_tally_xyz_atom(EV_FLOAT &ev, int i, int j, - F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, - X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; protected: typename AT::t_x_array_randomread x; diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index 36a376bff8..dec52b2363 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -59,6 +59,7 @@ class KokkosLMP { void accelerator(int, char **) {} int neigh_list_kokkos(int) { return 0; } int neigh_count(int) { return 0; } + void newton_check() {}; }; class AtomKokkos : public Atom { diff --git a/src/comm.cpp b/src/comm.cpp index 02999fd541..1b1546f893 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -75,6 +75,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) maxexchange = maxexchange_atom = maxexchange_fix = 0; maxexchange_fix_dynamic = 0; bufextra = BUFEXTRA; + bufextra_max = bufextra; grid2proc = nullptr; xsplit = ysplit = zsplit = nullptr; diff --git a/src/comm.h b/src/comm.h index fde4c3b81f..8515866dff 100644 --- a/src/comm.h +++ b/src/comm.h @@ -140,6 +140,7 @@ class Comm : protected Pointers { int maxexchange_fix; // static contribution to maxexchange from Fixes int maxexchange_fix_dynamic; // 1 if a fix has a dynamic contribution int bufextra; // augment send buf size for an exchange atom + int bufextra_max; int gridflag; // option for creating 3d grid int mapflag; // option for mapping procs to 3d grid diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index cf38271029..c75b183584 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -133,9 +133,11 @@ void CommBrick::init() { Comm::init(); - int bufextra_old = bufextra; init_exchange(); - if (bufextra > bufextra_old) grow_send(maxsend+bufextra,2); + if (bufextra > bufextra_max) { + grow_send(maxsend+bufextra,2); + bufextra_max = bufextra; + } // memory for multi style communication // allocate in setup @@ -672,9 +674,11 @@ void CommBrick::exchange() // only need to reset if a fix can dynamically add to size of single atom if (maxexchange_fix_dynamic) { - int bufextra_old = bufextra; init_exchange(); - if (bufextra > bufextra_old) grow_send(maxsend+bufextra,2); + if (bufextra > bufextra_max) { + grow_send(maxsend+bufextra,2); + bufextra_max = bufextra; + } } // subbox bounds for orthogonal or triclinic diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp index b864e0523d..f4657c4790 100644 --- a/src/comm_tiled.cpp +++ b/src/comm_tiled.cpp @@ -943,9 +943,11 @@ void CommTiled::exchange() // only need to reset if a fix can dynamically add to size of single atom if (maxexchange_fix_dynamic) { - int bufextra_old = bufextra; init_exchange(); - if (bufextra > bufextra_old) grow_send(maxsend+bufextra,2); + if (bufextra > bufextra_max) { + grow_send(maxsend+bufextra,2); + bufextra = bufextra_max; + } } // domain properties used in exchange method and methods it calls diff --git a/src/compute.h b/src/compute.h index 6956c3ae99..50c69d8d01 100644 --- a/src/compute.h +++ b/src/compute.h @@ -144,6 +144,7 @@ class Compute : protected Pointers { virtual void remove_bias(int, double *) {} virtual void remove_bias_thr(int, double *, double *) {} virtual void remove_bias_all() {} + virtual void remove_bias_all_kk() {} virtual void reapply_bias_all() {} virtual void restore_bias(int, double *) {} virtual void restore_bias_thr(int, double *, double *) {} diff --git a/src/compute_temp_sphere.cpp b/src/compute_temp_sphere.cpp index 2ee067abcb..65c2cdf3cc 100644 --- a/src/compute_temp_sphere.cpp +++ b/src/compute_temp_sphere.cpp @@ -14,6 +14,7 @@ #include "compute_temp_sphere.h" #include "atom.h" +#include "atom_masks.h" #include "domain.h" #include "error.h" #include "force.h" @@ -76,6 +77,8 @@ ComputeTempSphere::ComputeTempSphere(LAMMPS *lmp, int narg, char **arg) : if (!atom->omega_flag) error->all(FLERR, "Compute temp/sphere requires atom attribute omega"); if (!atom->radius_flag) error->all(FLERR, "Compute temp/sphere requires atom attribute radius"); + + datamask_modify = ALL_MASK & ~X_MASK; } /* ---------------------------------------------------------------------- */ diff --git a/src/fix.h b/src/fix.h index 594fbb51bf..7609caf5fe 100644 --- a/src/fix.h +++ b/src/fix.h @@ -216,17 +216,17 @@ class Fix : protected Pointers { virtual int pack_reverse_comm(int, int, double *) { return 0; } virtual void unpack_reverse_comm(int, int *, double *) {} - virtual void reset_grid(){}; + virtual void reset_grid() {}; - virtual void pack_forward_grid(int, void *, int, int *){}; - virtual void unpack_forward_grid(int, void *, int, int *){}; - virtual void pack_reverse_grid(int, void *, int, int *){}; - virtual void unpack_reverse_grid(int, void *, int, int *){}; - virtual void pack_remap_grid(int, void *, int, int *){}; - virtual void unpack_remap_grid(int, void *, int, int *){}; + virtual void pack_forward_grid(int, void *, int, int *) {}; + virtual void unpack_forward_grid(int, void *, int, int *) {}; + virtual void pack_reverse_grid(int, void *, int, int *) {}; + virtual void unpack_reverse_grid(int, void *, int, int *) {}; + virtual void pack_remap_grid(int, void *, int, int *) {}; + virtual void unpack_remap_grid(int, void *, int, int *) {}; virtual int unpack_read_grid(int, char *) { return 0; }; - virtual void pack_write_grid(int, void *){}; - virtual void unpack_write_grid(int, void *, int *){}; + virtual void pack_write_grid(int, void *) {}; + virtual void unpack_write_grid(int, void *, int *) {}; virtual int get_grid_by_name(const std::string &, int &) { return -1; }; virtual void *get_grid_by_index(int) { return nullptr; }; diff --git a/src/fix_balance.h b/src/fix_balance.h index a319710ac6..1d53451946 100644 --- a/src/fix_balance.h +++ b/src/fix_balance.h @@ -43,9 +43,9 @@ class FixBalance : public Fix { int nevery, lbstyle, nitermax; double thresh, stopthresh; std::string bstr; - int wtflag; // 1 for weighted balancing - int sortflag; // 1 for sorting comm messages - int reportonly; // 1 if skipping rebalancing and only computing imbalance + int wtflag; // 1 for weighted balancing + int sortflag; // 1 for sorting comm messages + int reportonly; // 1 if skipping rebalancing and only computing imbalance double imbnow; // current imbalance factor double imbprev; // imbalance factor before last rebalancing @@ -53,7 +53,7 @@ class FixBalance : public Fix { double maxloadperproc; // max load on any processor int itercount; // iteration count of last call to Balance int pending; - bigint lastbalance; // last timestep balancing was attempted + bigint lastbalance; // last timestep balancing was attempted class Balance *balance; class Irregular *irregular; diff --git a/src/fix_bond_history.h b/src/fix_bond_history.h index 377685ea84..7e8c998b72 100644 --- a/src/fix_bond_history.h +++ b/src/fix_bond_history.h @@ -62,7 +62,7 @@ class FixBondHistory : public Fix { // to enable quick look up std::map, std::vector> cached_histories; - int *setflag; // Set by BondBPM, which bond types are used + int *setflag; // Set by BondBPM, which bond types are used double **bondstore; int stored_flag; int ndata; diff --git a/src/input.cpp b/src/input.cpp index 38c38ca2ee..25211ba848 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1687,6 +1687,8 @@ void Input::newton() if (newton_pair || newton_bond) force->newton = 1; else force->newton = 0; + + if (lmp->kokkos) lmp->kokkos->newton_check(); } /* ---------------------------------------------------------------------- */