Merge branch 'develop' into collected-small-changes

This commit is contained in:
Axel Kohlmeyer
2025-01-21 20:06:50 -05:00
61 changed files with 900 additions and 624 deletions

81
.github/workflows/unittest-arm64.yml vendored Normal file
View File

@ -0,0 +1,81 @@
# GitHub action to build LAMMPS on Linux with ARM64 and run standard unit tests
name: "Unittest for Linux on ARM64"
on:
push:
branches: [develop]
workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs:
build:
name: Linux ARM64 Unit Test
if: ${{ github.repository == 'lammps/lammps' }}
runs-on: ubuntu-22.04-arm
env:
CCACHE_DIR: ${{ github.workspace }}/.ccache
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Install extra packages
run: |
sudo apt-get update
sudo apt-get install -y ccache \
libeigen3-dev \
libcurl4-openssl-dev \
mold \
ninja-build \
python3-dev
- name: Create Build Environment
run: mkdir build
- name: Set up ccache
uses: actions/cache@v4
with:
path: ${{ env.CCACHE_DIR }}
key: linux-unit-ccache-${{ github.sha }}
restore-keys: linux-unit-ccache-
- name: Building LAMMPS via CMake
shell: bash
run: |
ccache -z
python3 -m venv linuxenv
source linuxenv/bin/activate
python3 -m pip install numpy
python3 -m pip install pyyaml
cmake -S cmake -B build \
-C cmake/presets/gcc.cmake \
-C cmake/presets/most.cmake \
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
-D BUILD_SHARED_LIBS=on \
-D DOWNLOAD_POTENTIALS=off \
-D ENABLE_TESTING=on \
-D MLIAP_ENABLE_ACE=on \
-D MLIAP_ENABLE_PYTHON=off \
-D PKG_MANIFOLD=on \
-D PKG_ML-PACE=on \
-D PKG_ML-RANN=on \
-D PKG_RHEO=on \
-D PKG_PTM=on \
-D PKG_PYTHON=on \
-D PKG_QTB=on \
-D PKG_SMTBQ=on \
-G Ninja
cmake --build build
ccache -s
- name: Run Tests
working-directory: build
shell: bash
run: ctest -V -LE unstable

View File

@ -117,7 +117,6 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/group_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp

View File

@ -230,7 +230,6 @@ action fix_wall_region_kokkos.cpp
action fix_wall_region_kokkos.h
action grid3d_kokkos.cpp fft3d.h
action grid3d_kokkos.h fft3d.h
action group_kokkos.cpp
action group_kokkos.h
action improper_class2_kokkos.cpp improper_class2.cpp
action improper_class2_kokkos.h improper_class2.h

View File

@ -72,14 +72,14 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// reallocate per-atom arrays if necessary
if (eflag_atom) {
if(k_eatom.extent(0) < maxeatom) {
if ((int)k_eatom.extent(0) < maxeatom) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
d_eatom = k_eatom.template view<DeviceType>();
} else Kokkos::deep_copy(d_eatom,0.0);
}
if (vflag_atom) {
if(k_vatom.extent(0) < maxvatom) {
if ((int)k_vatom.extent(0) < maxvatom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"angle:vatom");
d_vatom = k_vatom.template view<DeviceType>();

View File

@ -76,7 +76,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag)
Kokkos::parallel_for(nanglelist_orig,LAMMPS_LAMBDA(int i) {
const int m = d_map[d_anglelist_orig(i,3)];
if (m >= 0) Kokkos::atomic_increment(&d_nanglelist[m]);
if (m >= 0) Kokkos::atomic_inc(&d_nanglelist[m]);
});
k_nanglelist.modify_device();
@ -87,7 +87,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag)
if (h_nanglelist[m] > maxangle_all)
maxangle_all = h_nanglelist[m] + EXTRA;
if (k_anglelist.d_view.extent(1) < maxangle_all)
if ((int)k_anglelist.d_view.extent(1) < maxangle_all)
MemKK::realloc_kokkos(k_anglelist, "angle_hybrid:anglelist", nstyles, maxangle_all, 4);
auto d_anglelist = k_anglelist.d_view;

View File

@ -67,14 +67,14 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// reallocate per-atom arrays if necessary
if (eflag_atom) {
if (k_eatom.extent(0) < maxeatom) {
if ((int)k_eatom.extent(0) < maxeatom) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
d_eatom = k_eatom.template view<KKDeviceType>();
} else Kokkos::deep_copy(d_eatom,0.0);
}
if (vflag_atom) {
if (k_vatom.extent(0) < maxvatom) {
if ((int)k_vatom.extent(0) < maxvatom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
d_vatom = k_vatom.template view<KKDeviceType>();

View File

@ -76,7 +76,7 @@ void BondHybridKokkos::compute(int eflag, int vflag)
Kokkos::parallel_for(nbondlist_orig,LAMMPS_LAMBDA(int i) {
const int m = d_map[d_bondlist_orig(i,2)];
if (m >= 0) Kokkos::atomic_increment(&d_nbondlist[m]);
if (m >= 0) Kokkos::atomic_inc(&d_nbondlist[m]);
});
k_nbondlist.modify_device();
@ -87,7 +87,7 @@ void BondHybridKokkos::compute(int eflag, int vflag)
if (h_nbondlist[m] > maxbond_all)
maxbond_all = h_nbondlist[m] + EXTRA;
if (k_bondlist.d_view.extent(1) < maxbond_all)
if ((int)k_bondlist.d_view.extent(1) < maxbond_all)
MemKK::realloc_kokkos(k_bondlist, "bond_hybrid:bondlist", nstyles, maxbond_all, 3);
auto d_bondlist = k_bondlist.d_view;

View File

@ -37,6 +37,8 @@ static constexpr int BUFEXTRA = 1000;
CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp)
{
sendlist = nullptr;
maxsendlist = nullptr;
nprocmaxtot = 0;
}
/* ---------------------------------------------------------------------- */
@ -49,6 +51,8 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp)
CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,oldcomm)
{
sendlist = nullptr;
maxsendlist = nullptr;
nprocmaxtot = 0;
}
/* ---------------------------------------------------------------------- */
@ -56,7 +60,9 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,o
CommTiledKokkos::~CommTiledKokkos()
{
memoryKK->destroy_kokkos(k_sendlist,sendlist);
memory->destroy(maxsendlist);
sendlist = nullptr;
maxsendlist = nullptr;
buf_send = nullptr;
buf_recv = nullptr;
}
@ -657,12 +663,11 @@ void CommTiledKokkos::grow_list(int iswap, int iwhich, int n)
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
if (size > (int)k_sendlist.extent(2)) {
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,maxsend,size,"comm:sendlist");
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,size,"comm:sendlist");
for (int i = 0; i < maxswap; i++)
maxsendlist[iswap][iwhich] = size;
}
for (int i = 0; i < maxswap; i++)
for (int j = 0; j < nprocmaxtot; j++)
maxsendlist[i][j] = size;
}
/* ----------------------------------------------------------------------
@ -692,24 +697,23 @@ void CommTiledKokkos::grow_swap_send(int i, int n, int /*nold*/)
memory->destroy(sendbox_multiold[i]);
memory->create(sendbox_multiold[i],n,atom->ntypes+1,6,"comm:sendbox_multiold");
delete [] maxsendlist[i];
maxsendlist[i] = new int[n];
for (int j = 0; j < n; j++)
maxsendlist[i][j] = BUFMIN;
if (sendlist && !k_sendlist.d_view.data()) {
for (int ii = 0; ii < maxswap; ii++) {
if (sendlist[ii]) {
for (int jj = 0; jj < nprocmax[ii]; jj++)
memory->destroy(sendlist[ii][jj]);
delete [] sendlist[ii];
}
}
if (sendlist && !k_sendlist.h_view.data()) {
delete [] sendlist;
delete [] maxsendlist;
sendlist = nullptr;
maxsendlist = nullptr;
} else {
memoryKK->destroy_kokkos(k_sendlist,sendlist);
memory->destroy(maxsendlist);
}
memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,n,BUFMIN,"comm:sendlist");
nprocmaxtot = MAX(nprocmaxtot,n);
memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,BUFMIN,"comm:sendlist");
memory->create(maxsendlist,maxswap,nprocmaxtot,"comm:maxsendlist");
for (int i = 0; i < maxswap; i++)
for (int j = 0; j < nprocmaxtot; j++)
maxsendlist[i][j] = BUFMIN;
}

View File

@ -64,18 +64,17 @@ class CommTiledKokkos : public CommTiled {
template<class DeviceType> void reverse_comm_device();
protected:
int nprocmaxtot;
DAT::tdual_int_3d k_sendlist;
//DAT::tdual_int_scalar k_total_send;
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
//DAT::tdual_int_scalar k_count;
void grow_send(int, int) override;
void grow_recv(int, int flag = 0) override;
void grow_send(int, int) override; // reallocate send buffer
void grow_recv(int, int flag = 0) override; // free/allocate recv buffer
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
void grow_recv_kokkos(int, int, ExecutionSpace space = Host);
void grow_list(int, int, int) override;
void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv
void grow_list(int, int, int) override; // reallocate sendlist for one swap/proc
void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv
};
} // namespace LAMMPS_NS

View File

@ -75,14 +75,14 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// reallocate per-atom arrays if necessary
if (eflag_atom) {
if(k_eatom.extent(0) < maxeatom) {
if ((int)k_eatom.extent(0) < maxeatom) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
d_eatom = k_eatom.view<DeviceType>();
} else Kokkos::deep_copy(d_eatom,0.0);
}
if (vflag_atom) {
if(k_vatom.extent(0) < maxvatom) {
if ((int)k_vatom.extent(0) < maxvatom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom");
d_vatom = k_vatom.view<DeviceType>();

View File

@ -76,7 +76,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag)
Kokkos::parallel_for(ndihedrallist_orig,LAMMPS_LAMBDA(int i) {
const int m = d_map[d_dihedrallist_orig(i,4)];
if (m >= 0) Kokkos::atomic_increment(&d_ndihedrallist[m]);
if (m >= 0) Kokkos::atomic_inc(&d_ndihedrallist[m]);
});
k_ndihedrallist.modify_device();
@ -87,7 +87,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag)
if (h_ndihedrallist[m] > maxdihedral_all)
maxdihedral_all = h_ndihedrallist[m] + EXTRA;
if (k_dihedrallist.d_view.extent(1) < maxdihedral_all)
if ((int)k_dihedrallist.d_view.extent(1) < maxdihedral_all)
MemKK::realloc_kokkos(k_dihedrallist, "dihedral_hybrid:dihedrallist", nstyles, maxdihedral_all, 5);
auto d_dihedrallist = k_dihedrallist.d_view;

View File

@ -690,7 +690,7 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
copymode = 1;
Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,nsend), KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
const int i = d_exchange_sendlist(mysend);
@ -782,7 +782,7 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
copymode = 1;
Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nrecv), KOKKOS_LAMBDA(const int &i) {
int index = d_indices(i);
if (index > -1) {
int m = d_ubuf(d_buf(i)).i;

View File

@ -39,7 +39,12 @@ enum { CONSTANT, EQUAL, ATOM };
template<class DeviceType>
FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) :
FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me)
FixLangevin(lmp, narg, arg),
#ifdef LMP_KOKKOS_DEBUG_RNG
rand_pool(seed + comm->me, lmp)
#else
rand_pool(seed + comm->me)
#endif
{
kokkosable = 1;
fuse_integrate_flag = 1;
@ -48,43 +53,42 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
int ntypes = atomKK->ntypes;
// allocate per-type arrays for force prefactors
delete[] gfactor1;
delete[] gfactor2;
delete[] ratio;
memoryKK->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1");
memoryKK->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2");
memoryKK->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio");
d_gfactor1 = k_gfactor1.template view<DeviceType>();
h_gfactor1 = k_gfactor1.template view<LMPHostType>();
h_gfactor1 = k_gfactor1.h_view;
d_gfactor2 = k_gfactor2.template view<DeviceType>();
h_gfactor2 = k_gfactor2.template view<LMPHostType>();
h_gfactor2 = k_gfactor2.h_view;
d_ratio = k_ratio.template view<DeviceType>();
h_ratio = k_ratio.template view<LMPHostType>();
h_ratio = k_ratio.h_view;
// optional args
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
k_ratio.template modify<LMPHostType>();
k_ratio.modify_host();
if (gjfflag) {
memory->destroy(franprev);
memory->destroy(lv);
grow_arrays(atomKK->nmax);
atom->add_callback(Atom::GROW);
// initialize franprev to zero
for (int i = 0; i < atomKK->nlocal; i++) {
franprev[i][0] = 0.0;
franprev[i][1] = 0.0;
franprev[i][2] = 0.0;
lv[i][0] = 0.0;
lv[i][1] = 0.0;
lv[i][2] = 0.0;
}
k_franprev.template modify<LMPHostType>();
k_lv.template modify<LMPHostType>();
Kokkos::deep_copy(d_franprev,0.0);
Kokkos::deep_copy(d_lv,0.0);
}
if (zeroflag) {
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
h_fsumall = k_fsumall.template view<LMPHostType>();
h_fsumall = k_fsumall.h_view;
d_fsumall = k_fsumall.template view<DeviceType>();
}
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
datamask_modify = F_MASK;
}
@ -93,13 +97,21 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
template<class DeviceType>
FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
{
if (copymode) return;
memoryKK->destroy_kokkos(k_gfactor1,gfactor1);
memoryKK->destroy_kokkos(k_gfactor2,gfactor2);
memoryKK->destroy_kokkos(k_ratio,ratio);
memoryKK->destroy_kokkos(k_flangevin,flangevin);
if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
if (gjfflag) {
memoryKK->destroy_kokkos(k_franprev,franprev);
memoryKK->destroy_kokkos(k_lv,lv);
}
memoryKK->destroy_kokkos(k_tforce,tforce);
#ifdef LMP_KOKKOS_DEBUG_RNG
rand_pool.destroy();
#endif
}
/* ---------------------------------------------------------------------- */
@ -118,8 +130,170 @@ void FixLangevinKokkos<DeviceType>::init()
error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians");
// prefactors are modified in the init
k_gfactor1.template modify<LMPHostType>();
k_gfactor2.template modify<LMPHostType>();
k_gfactor1.modify_host();
k_gfactor2.modify_host();
#ifdef LMP_KOKKOS_DEBUG_RNG
rand_pool.init(random,seed + comm->me);
#endif
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::setup(int vflag)
{
if (gjfflag) {
double dt = update->dt;
double ftm2v = force->ftm2v;
auto v = atomKK->k_v.view<DeviceType>();
auto f = atomKK->k_f.view<DeviceType>();
auto mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atom->nlocal;
auto rmass = atomKK->k_rmass.view<DeviceType>();
auto mass = atomKK->k_mass.view<DeviceType>();
auto type = atomKK->k_type.view<DeviceType>();
auto groupbit = this->groupbit;
auto gjfa = this->gjfa;
auto gjfsib = this->gjfsib;
if (atom->rmass) {
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
v(i,0) -= dtfm * f(i,0);
v(i,1) -= dtfm * f(i,1);
v(i,2) -= dtfm * f(i,2);
}
});
if (tbiasflag) {
// account for bias velocity
if (temperature->kokkosable) {
temperature->compute_scalar();
temperature->remove_bias_all_kk();
} else {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->compute_scalar();
temperature->remove_bias_all();
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
atomKK->sync(execution_space,temperature->datamask_modify);
}
}
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
v(i,0) /= gjfa * gjfsib * gjfsib;
v(i,1) /= gjfa * gjfsib * gjfsib;
v(i,2) /= gjfa * gjfsib * gjfsib;
}
});
if (tbiasflag) {
if (temperature->kokkosable) temperature->restore_bias_all();
else {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->restore_bias_all();
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
atomKK->sync(execution_space,temperature->datamask_modify);
}
}
} else {
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
v(i,0) -= dtfm * f(i,0);
v(i,1) -= dtfm * f(i,1);
v(i,2) -= dtfm * f(i,2);
}
});
if (tbiasflag) {
// account for bias velocity
if (temperature->kokkosable) {
temperature->compute_scalar();
temperature->remove_bias_all_kk();
} else {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->compute_scalar();
temperature->remove_bias_all();
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
atomKK->sync(execution_space,temperature->datamask_modify);
}
}
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
v(i,0) /= gjfa * gjfsib * gjfsib;
v(i,1) /= gjfa * gjfsib * gjfsib;
v(i,2) /= gjfa * gjfsib * gjfsib;
}
});
if (tbiasflag) {
if (temperature->kokkosable) temperature->restore_bias_all();
else {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->restore_bias_all();
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
atomKK->sync(execution_space,temperature->datamask_modify);
}
}
}
atomKK->modified(execution_space,V_MASK);
}
post_force(vflag);
if (gjfflag) {
double dt = update->dt;
double ftm2v = force->ftm2v;
auto f = atomKK->k_f.view<DeviceType>();
auto v = atomKK->k_v.view<DeviceType>();
auto mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atom->nlocal;
auto rmass = atomKK->k_rmass.view<DeviceType>();
auto mass = atomKK->k_mass.view<DeviceType>();
auto type = atomKK->k_type.view<DeviceType>();
auto groupbit = this->groupbit;
k_lv.template sync<DeviceType>();
auto l_lv = d_lv;
if (atom->rmass) {
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
l_lv(i,0) = v(i,0);
l_lv(i,1) = v(i,1);
l_lv(i,2) = v(i,2);
}
});
} else {
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
if (mask[i] & groupbit) {
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
l_lv(i,0) = v(i,0);
l_lv(i,1) = v(i,1);
l_lv(i,2) = v(i,2);
}
});
}
atomKK->modified(execution_space,V_MASK);
k_lv.template modify<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
@ -129,10 +303,10 @@ void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax)
{
memoryKK->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev");
d_franprev = k_franprev.template view<DeviceType>();
h_franprev = k_franprev.template view<LMPHostType>();
h_franprev = k_franprev.h_view;
memoryKK->grow_kokkos(k_lv,lv,nmax,3,"langevin:lv");
d_lv = k_lv.template view<DeviceType>();
h_lv = k_lv.template view<LMPHostType>();
h_lv = k_lv.h_view;
}
/* ---------------------------------------------------------------------- */
@ -141,7 +315,6 @@ template<class DeviceType>
void FixLangevinKokkos<DeviceType>::initial_integrate(int /*vflag*/)
{
atomKK->sync(execution_space,datamask_read);
atomKK->modified(execution_space,datamask_modify);
v = atomKK->k_v.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
@ -150,6 +323,8 @@ void FixLangevinKokkos<DeviceType>::initial_integrate(int /*vflag*/)
FixLangevinKokkosInitialIntegrateFunctor<DeviceType> functor(this);
Kokkos::parallel_for(nlocal,functor);
atomKK->modified(execution_space,datamask_modify);
}
template<class DeviceType>
@ -184,6 +359,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
rmass = atomKK->k_rmass.view<DeviceType>();
f = atomKK->k_f.template view<DeviceType>();
v = atomKK->k_v.template view<DeviceType>();
mass = atomKK->k_mass.template view<DeviceType>();
type = atomKK->k_type.template view<DeviceType>();
mask = atomKK->k_mask.template view<DeviceType>();
@ -197,7 +373,8 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
dt = update->dt;
mvv2e = force->mvv2e;
ftm2v = force->ftm2v;
fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e);
fran_prop_const = sqrt(2.0*boltz/t_period/dt/mvv2e);
fran_prop_const_gjf = sqrt(24.0*boltz/t_period/dt/mvv2e);
compute_target(); // modifies tforce vector, hence sync here
k_tforce.template sync<DeviceType>();
@ -220,7 +397,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
maxatom1 = atomKK->nmax;
memoryKK->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin");
d_flangevin = k_flangevin.template view<DeviceType>();
h_flangevin = k_flangevin.template view<LMPHostType>();
h_flangevin = k_flangevin.h_view;
}
}
@ -550,7 +727,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
h_fsumall(0) = fsumall[0]/count;
h_fsumall(1) = fsumall[1]/count;
h_fsumall(2) = fsumall[2]/count;
k_fsumall.template modify<LMPHostType>();
k_fsumall.modify_host();
k_fsumall.template sync<DeviceType>();
// set total force zero in parallel on the device
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
@ -581,20 +758,30 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
if (mask[i] & groupbit) {
rand_type rand_gen = rand_pool.get_state();
if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
if (Tp_RMASS) {
gamma1 = -rmass[i] / t_period / ftm2v;
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
gamma1 *= 1.0/d_ratio[type[i]];
if (Tp_GJF)
gamma2 = sqrt(rmass[i]) * fran_prop_const_gjf / ftm2v;
else
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
gamma1 *= 1.0/ratio[type[i]];
gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t;
} else {
gamma1 = d_gfactor1[type[i]];
gamma2 = d_gfactor2[type[i]] * tsqrt_t;
}
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
if (Tp_GJF) {
fran[0] = gamma2 * rand_gen.normal(); //random->gaussian()
fran[1] = gamma2 * rand_gen.normal(); //random->gaussian()
fran[2] = gamma2 * rand_gen.normal(); //random->gaussian()
} else {
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
}
if (Tp_BIAS) {
fdrag[0] = gamma1*v(i,0);
@ -678,7 +865,6 @@ void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
f(i,1) -= d_fsumall[1];
f(i,2) -= d_fsumall[2];
}
}
/* ----------------------------------------------------------------------
@ -740,7 +926,7 @@ void FixLangevinKokkos<DeviceType>::reset_dt()
force->ftm2v;
h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]);
}
k_gfactor2.template modify<LMPHostType>();
k_gfactor2.modify_host();
}
}
@ -781,9 +967,15 @@ KOKKOS_INLINE_FUNCTION
double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const
{
double my_energy = 0.0;
if (mask[i] & groupbit)
my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
d_flangevin(i,2)*v(i,2);
if (mask[i] & groupbit) {
if (gjfflag) {
my_energy = d_flangevin(i,0)*d_lv(i,0) + d_flangevin(i,1)*d_lv(i,1) +
d_flangevin(i,2)*d_lv(i,2);
} else {
my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
d_flangevin(i,2)*v(i,2);
}
}
return my_energy;
}
@ -796,30 +988,42 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
{
if (!tallyflag && !gjfflag) return;
dt = update->dt;
ftm2v = force->ftm2v;
v = atomKK->k_v.template view<DeviceType>();
f = atomKK->k_f.template view<DeviceType>();
rmass = atomKK->k_rmass.template view<DeviceType>();
mass = atomKK->k_mass.template view<DeviceType>();
mask = atomKK->k_mask.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK | MASK_MASK);
int nlocal = atomKK->nlocal;
energy_onestep = 0.0;
atomKK->sync(execution_space,V_MASK | MASK_MASK);
if (gjfflag) k_lv.template sync<DeviceType>();
k_flangevin.template sync<DeviceType>();
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
if (tallyflag) {
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
}
if (gjfflag) {
if (rmass.data()) {
atomKK->sync(execution_space,RMASS_MASK);
FixLangevinKokkosEndOfStepFunctor<DeviceType,1> functor(this);
Kokkos::parallel_for(nlocal,functor);
} else {
atomKK->sync(execution_space,TYPE_MASK);
type = atomKK->k_type.template view<DeviceType>();
mass = atomKK->k_mass.view<DeviceType>();
FixLangevinKokkosEndOfStepFunctor<DeviceType,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
}
}
atomKK->modified(execution_space,V_MASK);
k_lv.template modify<DeviceType>();
energy += energy_onestep*update->dt;
}
@ -828,7 +1032,7 @@ KOKKOS_INLINE_FUNCTION
void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
double tmp[3];
if (mask[i] & groupbit) {
const double dtfm = force->ftm2v * 0.5 * dt / mass[type[i]];
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
tmp[0] = v(i,0);
tmp[1] = v(i,1);
tmp[2] = v(i,2);
@ -841,10 +1045,10 @@ void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,0)) +
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,0);
v(i,1) = 0.5 * gjfsib * gjfsib * (v(i,1) + dtfm * f(i,1) / gjfa) +
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,1)) +
dtfm * 0.5 * (gjfsib * d_flangevin(i,1) - d_franprev(i,1)) +
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,1);
v(i,2) = 0.5 * gjfsib * gjfsib * (v(i,2) + dtfm * f(i,2) / gjfa) +
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,2)) +
dtfm * 0.5 * (gjfsib * d_flangevin(i,2) - d_franprev(i,2)) +
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,2);
}
d_lv(i,0) = tmp[0];
@ -859,7 +1063,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
{
double tmp[3];
if (mask[i] & groupbit) {
const double dtfm = force->ftm2v * 0.5 * dt / rmass[i];
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
tmp[0] = v(i,0);
tmp[1] = v(i,1);
tmp[2] = v(i,2);
@ -891,6 +1095,9 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
{
k_franprev.sync_host();
k_lv.sync_host();
h_franprev(j,0) = h_franprev(i,0);
h_franprev(j,1) = h_franprev(i,1);
h_franprev(j,2) = h_franprev(i,2);
@ -898,8 +1105,8 @@ void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
h_lv(j,1) = h_lv(i,1);
h_lv(j,2) = h_lv(i,2);
k_franprev.template modify<LMPHostType>();
k_lv.template modify<LMPHostType>();
k_franprev.modify_host();
k_lv.modify_host();
}
@ -924,24 +1131,6 @@ void FixLangevinKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, Bin
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::cleanup_copy()
{
random = nullptr;
tstr = nullptr;
gfactor1 = nullptr;
gfactor2 = nullptr;
ratio = nullptr;
id_temp = nullptr;
flangevin = nullptr;
tforce = nullptr;
gjfflag = 0;
franprev = nullptr;
lv = nullptr;
id = style = nullptr;
vatom = nullptr;
}
namespace LAMMPS_NS {
template class FixLangevinKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU

View File

@ -27,6 +27,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>);
#include "kokkos_type.h"
#include "kokkos_base.h"
#include "Kokkos_Random.hpp"
#include "rand_pool_wrap_kokkos.h"
namespace LAMMPS_NS {
@ -66,8 +67,8 @@ namespace LAMMPS_NS {
FixLangevinKokkos(class LAMMPS *, int, char **);
~FixLangevinKokkos() override;
void cleanup_copy();
void init() override;
void setup(int) override;
void initial_integrate(int) override;
void fused_integrate(int) override;
void post_force(int) override;
@ -135,13 +136,21 @@ namespace LAMMPS_NS {
typename tdual_double_1d_3n::t_dev d_fsumall;
typename tdual_double_1d_3n::t_host h_fsumall;
double boltz,dt,mvv2e,ftm2v,fran_prop_const;
double boltz,dt,mvv2e,ftm2v,fran_prop_const,fran_prop_const_gjf;
void compute_target();
#ifndef LMP_KOKKOS_DEBUG_RNG
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
//Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
//typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
#else
RandPoolWrap rand_pool;
typedef RandWrap rand_type;
#endif
};
template <class DeviceType>
@ -150,7 +159,7 @@ namespace LAMMPS_NS {
FixLangevinKokkos<DeviceType> c;
FixLangevinKokkosInitialIntegrateFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();};
c(*c_ptr) {c.set_copymode(1);};
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
@ -168,7 +177,7 @@ namespace LAMMPS_NS {
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {}
~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();}
~FixLangevinKokkosPostForceFunctor() {c.set_copymode(1);}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
@ -204,7 +213,7 @@ namespace LAMMPS_NS {
FixLangevinKokkos<DeviceType> c;
FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();}
c(*c_ptr) {c.set_copymode(1);}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
@ -218,7 +227,7 @@ namespace LAMMPS_NS {
FixLangevinKokkos<DeviceType> c;
typedef double value_type;
FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();}
c(*c_ptr) {c.set_copymode(1);}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy) const {
@ -241,7 +250,7 @@ namespace LAMMPS_NS {
FixLangevinKokkos<DeviceType> c;
FixLangevinKokkosEndOfStepFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();}
c(*c_ptr) {c.set_copymode(1);}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {

View File

@ -36,7 +36,7 @@ FixMomentumKokkos<DeviceType>::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
groupKK = (GroupKokkos<DeviceType> *)group;
groupKK = (GroupKokkos *)group;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
@ -94,7 +94,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
double ekin_old,ekin_new;
ekin_old = ekin_new = 0.0;
if (dynamic) masstotal = groupKK->mass(igroup);
if (dynamic) masstotal = groupKK->mass_kk<DeviceType>(igroup);
// do nothing if group is empty, i.e. mass is zero;
@ -109,7 +109,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
auto groupbit2 = groupbit;
if (linear) {
double vcm[3];
groupKK->vcm(igroup,masstotal,vcm);
groupKK->vcm_kk<DeviceType>(igroup,masstotal,vcm);
// adjust velocities by vcm to zero linear momentum
// only adjust a component if flag is set
@ -131,9 +131,9 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
if (angular) {
double xcm[3],angmom[3],omega[3],inertia[3][3];
groupKK->xcm(igroup,masstotal,xcm);
groupKK->angmom(igroup,xcm,angmom);
groupKK->inertia(igroup,xcm,inertia);
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
groupKK->angmom_kk<DeviceType>(igroup,xcm,angmom);
groupKK->inertia_kk<DeviceType>(igroup,xcm,inertia);
group->omega(angmom,inertia,omega);
// adjust velocities to zero omega

View File

@ -38,7 +38,7 @@ class FixMomentumKokkos : public FixMomentum {
FixMomentumKokkos(class LAMMPS *, int, char **);
void end_of_step() override;
private:
GroupKokkos<DeviceType> *groupKK;
GroupKokkos *groupKK;
};
}

View File

@ -66,7 +66,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_rmass[i];
d_v(i,0) += dtfm * d_f(i,0);
@ -95,7 +95,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
auto l_groupbit = groupbit;
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_mass[d_type[i]];
d_v(i,0) += dtfm * d_f(i,0);
@ -144,7 +144,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_rmass[i];
d_v(i,0) += dtfm * d_f(i,0);
@ -168,7 +168,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
if (d_mask[i] & l_groupbit) {
const double dtfm = l_dtf / d_mass[d_type[i]];
d_v(i,0) += dtfm * d_f(i,0);

View File

@ -38,7 +38,7 @@ FixRecenterKokkos<DeviceType>::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a
{
kokkosable = 1;
atomKK = (AtomKokkos *)atom;
groupKK = (GroupKokkos<DeviceType> *)group;
groupKK = (GroupKokkos *)group;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | MASK_MASK;
@ -87,9 +87,10 @@ void FixRecenterKokkos<DeviceType>::initial_integrate(int /*vflag*/)
// current COM
if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup);
if (group->dynamic[igroup]) masstotal = groupKK->mass_kk<DeviceType>(igroup);
double xcm[3];
groupKK->xcm(igroup,masstotal,xcm);
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
// shift coords by difference between actual COM and requested COM

View File

@ -36,7 +36,7 @@ class FixRecenterKokkos : public FixRecenter {
FixRecenterKokkos(class LAMMPS *, int, char **);
void initial_integrate(int) override;
private:
GroupKokkos<DeviceType> *groupKK;
GroupKokkos *groupKK;
};
} // namespace LAMMPS_NS

View File

@ -1859,7 +1859,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
// loop over neighbors of my atoms
#if 0
Kokkos::parallel_for ( inum,
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,inum),
LAMMPS_LAMBDA(const int ii)
{
// Create an atomic view of sumWeights and dpdThetaLocal. Only needed
@ -1939,7 +1939,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
// self-interaction for local temperature
#if 0
Kokkos::parallel_for ( nlocal,
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,nlocal),
LAMMPS_LAMBDA(const int i)
{
double wij = 0.0;

View File

@ -283,22 +283,22 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
const X_FLOAT delz = ztmp - x(j, 2);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
else Kokkos::atomic_increment(&(d_counters(0, 1)));
Kokkos::atomic_increment(&(d_counters(0, 2)));
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0)));
else Kokkos::atomic_inc(&(d_counters(0, 1)));
Kokkos::atomic_inc(&(d_counters(0, 2)));
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
Kokkos::atomic_increment(&(d_hist(rsqi)));
Kokkos::atomic_inc(&(d_hist(rsqi)));
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
&& (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
else Kokkos::atomic_increment(&(d_counters(1, 1)));
Kokkos::atomic_increment(&(d_counters(1, 2)));
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0)));
else Kokkos::atomic_inc(&(d_counters(1, 1)));
Kokkos::atomic_inc(&(d_counters(1, 2)));
#endif
double r = sqrt(rsq);
double rinv = 1.0/r;
@ -428,22 +428,22 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
const X_FLOAT delz = ztmp - x(j, 2);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
else Kokkos::atomic_increment(&(d_counters(0, 1)));
Kokkos::atomic_increment(&(d_counters(0, 2)));
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0)));
else Kokkos::atomic_inc(&(d_counters(0, 1)));
Kokkos::atomic_inc(&(d_counters(0, 2)));
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
Kokkos::atomic_increment(&(d_hist(rsqi)));
Kokkos::atomic_inc(&(d_hist(rsqi)));
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
&& (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
else Kokkos::atomic_increment(&(d_counters(1, 1)));
Kokkos::atomic_increment(&(d_counters(1, 2)));
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0)));
else Kokkos::atomic_inc(&(d_counters(1, 1)));
Kokkos::atomic_inc(&(d_counters(1, 2)));
#endif
double r = sqrt(rsq);

View File

@ -123,7 +123,7 @@ void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
auto l_yflag = yflag;
auto l_zflag = zflag;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(const int& i, double& espring_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);

View File

@ -1,363 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Mitch Murphy (alphataubio at gmail)
------------------------------------------------------------------------- */
#include "group_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "kokkos_few.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
GroupKokkos<DeviceType>::GroupKokkos(LAMMPS *lmp) : Group(lmp)
{
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
}
// ----------------------------------------------------------------------
// computations on a group of atoms
// ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
compute the total mass of group of atoms
use either per-type mass or per-atom rmass
------------------------------------------------------------------------- */
template<class DeviceType>
double GroupKokkos<DeviceType>::mass(int igroup)
{
int groupbit = bitmask[igroup];
auto d_mask = atomKK->k_mask.template view<DeviceType>();
double one = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_rmass(i);
}, one);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
}, one);
}
double all;
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
return all;
}
/* ----------------------------------------------------------------------
compute the center-of-mass coords of group of atoms
masstotal = total mass
return center-of-mass coords in cm[]
must unwrap atoms to compute center-of-mass correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void GroupKokkos<DeviceType>::xcm(int igroup, double masstotal, double *xcm)
{
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
double cmone[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
}
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
xcm[0] /= masstotal;
xcm[1] /= masstotal;
xcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the center-of-mass velocity of group of atoms
masstotal = total mass
return center-of-mass velocity in vcm[]
------------------------------------------------------------------------- */
template<class DeviceType>
void GroupKokkos<DeviceType>::vcm(int igroup, double masstotal, double *vcm)
{
int groupbit = bitmask[igroup];
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
vcm[0] /= masstotal;
vcm[1] /= masstotal;
vcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the angular momentum L (lmom) of group
around center-of-mass cm
must unwrap atoms to compute L correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void GroupKokkos<DeviceType>::angmom(int igroup, double *xcm, double *lmom)
{
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
}
/* ----------------------------------------------------------------------
compute moment of inertia tensor around center-of-mass xcm of group
must unwrap atoms to compute itensor correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void GroupKokkos<DeviceType>::inertia(int igroup, double *xcm, double itensor[3][3])
{
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double ione[3][3];
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
}
ione[1][0] = ione[0][1];
ione[2][1] = ione[1][2];
ione[2][0] = ione[0][2];
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
}
namespace LAMMPS_NS {
template class GroupKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class GroupKokkos<LMPHostType>;
#endif
}

View File

@ -15,22 +15,352 @@
#define LMP_GROUP_KOKKOS_H
#include "group.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "kokkos_few.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class GroupKokkos : public Group {
public:
GroupKokkos(class LAMMPS *);
double mass(int); // total mass of atoms in group
void xcm(int, double, double *); // center-of-mass coords of group
void vcm(int, double, double *); // center-of-mass velocity of group
void angmom(int, double *, double *); // angular momentum of group
void inertia(int, double *, double[3][3]); // inertia tensor
GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; }
// ----------------------------------------------------------------------
// computations on a group of atoms
// ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
compute the total mass of group of atoms
use either per-type mass or per-atom rmass
------------------------------------------------------------------------- */
template<class DeviceType>
double mass_kk(int igroup)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_mask = atomKK->k_mask.template view<DeviceType>();
double one = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_rmass(i);
}, one);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
}, one);
}
double all;
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
return all;
}
/* ----------------------------------------------------------------------
compute the center-of-mass coords of group of atoms
masstotal = total mass
return center-of-mass coords in cm[]
must unwrap atoms to compute center-of-mass correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void xcm_kk(int igroup, double masstotal, double *xcm)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
double cmone[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
l_cmx += unwrapKK[0] * massone;
l_cmy += unwrapKK[1] * massone;
l_cmz += unwrapKK[2] * massone;
}
}, cmone[0], cmone[1], cmone[2]);
}
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
xcm[0] /= masstotal;
xcm[1] /= masstotal;
xcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the center-of-mass velocity of group of atoms
masstotal = total mass
return center-of-mass velocity in vcm[]
------------------------------------------------------------------------- */
template<class DeviceType>
void vcm_kk(int igroup, double masstotal, double *vcm)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
l_px += d_v(i,0) * massone;
l_py += d_v(i,1) * massone;
l_pz += d_v(i,2) * massone;
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
if (masstotal > 0.0) {
vcm[0] /= masstotal;
vcm[1] /= masstotal;
vcm[2] /= masstotal;
}
}
/* ----------------------------------------------------------------------
compute the angular momentum L (lmom) of group
around center-of-mass cm
must unwrap atoms to compute L correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void angmom_kk(int igroup, double *xcm, double *lmom)
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_v = atomKK->k_v.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double p[3] = {0.0, 0.0, 0.0};
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
}
}, p[0], p[1], p[2]);
}
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
}
/* ----------------------------------------------------------------------
compute moment of inertia tensor around center-of-mass xcm of group
must unwrap atoms to compute itensor correctly
------------------------------------------------------------------------- */
template<class DeviceType>
void inertia_kk(int igroup, double *xcm, double itensor[3][3])
{
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
int groupbit = bitmask[igroup];
auto d_x = atomKK->k_x.template view<DeviceType>();
auto d_mask = atomKK->k_mask.template view<DeviceType>();
auto d_image = atomKK->k_image.template view<DeviceType>();
auto l_prd = Few<double, 3>(domain->prd);
auto l_h = Few<double, 6>(domain->h);
auto l_triclinic = domain->triclinic;
auto l_xcm0 = xcm[0];
auto l_xcm1 = xcm[1];
auto l_xcm2 = xcm[2];
double ione[3][3];
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
if (atomKK->rmass) {
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_rmass(i);
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
} else {
auto d_mass = atomKK->k_mass.template view<DeviceType>();
auto d_type = atomKK->k_type.template view<DeviceType>();
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
atomKK->k_mass.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
if (d_mask(i) & groupbit) {
double massone = d_mass(d_type(i));
Few<double,3> x_i;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
double dx = unwrapKK[0] - l_xcm0;
double dy = unwrapKK[1] - l_xcm1;
double dz = unwrapKK[2] - l_xcm2;
l_i00 += massone * (dy * dy + dz * dz);
l_i11 += massone * (dx * dx + dz * dz);
l_i22 += massone * (dx * dx + dy * dy);
l_i01 -= massone * dx * dy;
l_i12 -= massone * dy * dz;
l_i02 -= massone * dx * dz;
}
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
}
ione[1][0] = ione[0][1];
ione[2][1] = ione[1][2];
ione[2][0] = ione[0][2];
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
}
private:
ExecutionSpace execution_space;
};
} // namespace LAMMPS_NS

View File

@ -74,14 +74,14 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
// reallocate per-atom arrays if necessary
if (eflag_atom) {
if(k_eatom.extent(0) < maxeatom) {
if ((int)k_eatom.extent(0) < maxeatom) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
d_eatom = k_eatom.template view<KKDeviceType>();
} else Kokkos::deep_copy(d_eatom,0.0);
}
if (vflag_atom) {
if(k_vatom.extent(0) < maxvatom) {
if ((int)k_vatom.extent(0) < maxvatom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
d_vatom = k_vatom.template view<KKDeviceType>();

View File

@ -77,7 +77,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag)
Kokkos::parallel_for(nimproperlist_orig,LAMMPS_LAMBDA(int i) {
const int m = d_map[d_improperlist_orig(i,4)];
if (m >= 0) Kokkos::atomic_increment(&d_nimproperlist[m]);
if (m >= 0) Kokkos::atomic_inc(&d_nimproperlist[m]);
});
k_nimproperlist.modify_device();
@ -88,7 +88,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag)
if (h_nimproperlist[m] > maximproper_all)
maximproper_all = h_nimproperlist[m] + EXTRA;
if (k_improperlist.d_view.extent(1) < maximproper_all)
if ((int)k_improperlist.d_view.extent(1) < maximproper_all)
MemKK::realloc_kokkos(k_improperlist, "improper_hybrid:improperlist", nstyles, maximproper_all, 5);
auto d_improperlist = k_improperlist.d_view;

View File

@ -221,15 +221,19 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type ***&array,
int n1, int n2, int n3, const char *name)
{
data = TYPE(std::string(name),n1,n2,n3);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name);
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
bigint m;
for (int i = 0; i < n1; i++) {
if (n2 == 0) {
array[i] = nullptr;
} else {
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
m = ((bigint) i) * n2;
array[i] = &plane[m];
for (int j = 0; j < n2; j++) {
if (n3 == 0)
array[i][j] = nullptr;
@ -248,15 +252,19 @@ template <typename TYPE, typename HTYPE>
{
data = TYPE(std::string(name),n1,n2);
h_data = Kokkos::create_mirror_view(data);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name);
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
bigint m;
for (int i = 0; i < n1; i++) {
if (n2 == 0) {
array[i] = nullptr;
} else {
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
m = ((bigint) i) * n2;
array[i] = &plane[m];
for (int j = 0; j < n2; j++) {
if (n3 == 0)
array[i][j] = nullptr;
@ -288,15 +296,19 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type ***&array,
{
if (array == nullptr) return create_kokkos(data,array,n1,n2,n3,name);
data.resize(n1,n2,n3);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
typename TYPE::value_type **plane = (typename TYPE::value_type **) srealloc(array[0],nbytes,name);
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
array = (typename TYPE::value_type ***) srealloc(array,nbytes,name);
bigint m;
for (int i = 0; i < n1; i++) {
if (n2 == 0) {
array[i] = nullptr;
} else {
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
m = ((bigint) i) * n2;
array[i] = &plane[m];
for (int j = 0; j < n2; j++) {
if (n3 == 0)
array[i][j] = nullptr;
@ -316,10 +328,9 @@ template <typename TYPE>
void destroy_kokkos(TYPE data, typename TYPE::value_type*** &array)
{
if (array == nullptr) return;
int n1 = data.extent(0);
for (int i = 0; i < n1; ++i)
sfree(array[i]);
data = TYPE();
sfree(array[0]);
sfree(array);
array = nullptr;
}
@ -411,7 +422,7 @@ template <typename TYPE>
TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,const char *name)
{
data = TYPE();
data = TYPE(std::string(name) ,n1,n2,n3,n4,n5,n6);
data = TYPE(std::string(name),n1,n2,n3,n4,n5,n6);
return data;
}
@ -420,4 +431,3 @@ TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,
}
#endif

View File

@ -145,13 +145,13 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
auto type = atomKK->k_type.view<DeviceType>();
auto map=k_pairmliap->k_map.template view<DeviceType>();
Kokkos::parallel_scan(natomneigh, KOKKOS_LAMBDA (int ii, int &update, const bool final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &update, const bool final) {
if (final)
d_ij(ii) = update;
update += d_numneighs(ii);
});
Kokkos::parallel_for(natomneigh, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii) {
int ij = d_ij(ii);
const int i = d_ilist[ii];
const double xtmp = x(i, 0);
@ -183,7 +183,7 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
d_ielems[ii] = ielem;
});
Kokkos::parallel_for(nmax, KOKKOS_LAMBDA (int i) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nmax), KOKKOS_LAMBDA (int i) {
const int itype = type(i);
d_elems(i) = map(itype);
});
@ -225,7 +225,7 @@ void MLIAPDataKokkos<DeviceType>::grow_neigharrays() {
auto d_cutsq=k_pairmliap->k_cutsq.template view<DeviceType>();
auto h_cutsq=k_pairmliap->k_cutsq.template view<LMPHostType>();
auto d_numneighs = k_numneighs.template view<DeviceType>();
Kokkos::parallel_reduce(natomneigh, KOKKOS_LAMBDA (int ii, int &contrib) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &contrib) {
const int i = d_ilist[ii];
int count=0;
const double xtmp = x(i, 0);

View File

@ -75,7 +75,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_forces(class MLIAPData *data_
Kokkos::View<double[6], DeviceType> virial("virial");
data->k_pairmliap->k_vatom.template modify<LMPHostType>();
data->k_pairmliap->k_vatom.template sync<DeviceType>();
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA(int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA(int ii) {
double fij[3];
const int i = d_iatoms(ii);
@ -187,7 +187,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_force_gradients(class MLIAPDa
auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz;
Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
const int i = d_iatoms(ii);
// ensure rij, inside, wj, and rcutij are of size jnum

View File

@ -147,7 +147,7 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
auto l_numneigh_half = d_numneigh_half;
auto l_offset = d_offset;
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,inum_half), LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
int i = l_ilist_half[ii];
m_fill += l_numneigh_half[i];
if (final)

View File

@ -302,7 +302,7 @@ void PairMLIAPKokkos<DeviceType>::e_tally(MLIAPData* data)
auto d_iatoms = k_data->k_iatoms.template view<DeviceType>();
auto d_eatoms = k_data->k_eatoms.template view<DeviceType>();
auto d_eatom = k_eatom.template view<DeviceType>();
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA (int ii) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
d_eatom(d_iatoms(ii)) = d_eatoms(ii);
});
k_eatom.modify<DeviceType>();

View File

@ -532,7 +532,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
auto l_neighbors = d_neighbors;
// compute number of pairs for each atom i
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
Kokkos::parallel_for("NeighborCount", typename Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team) {
int i = team.league_rank();
int gi = l_ilist(gi1 + i);
double xi0 = l_x(gi, 0);
@ -555,7 +555,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
});
// accumalative sum
Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) {
Kokkos::parallel_scan("InclusivePrefixSum", Kokkos::RangePolicy<DeviceType>(0,Ni + 1), KOKKOS_LAMBDA(int i, int& update, const bool final) {
if (i > 0) {
update += l_numij(i);
if (final) {
@ -582,7 +582,7 @@ void PairPODKokkos<DeviceType>::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,
auto l_map = d_map;
auto l_type = type;
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
Kokkos::parallel_for("NeighborList", typename Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team) {
int i = team.league_rank();
int gi = l_ilist(gi1 + i);
double xi0 = l_x(gi, 0);
@ -622,7 +622,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree,
int l_inversedegree, int l_nbesselpars, int Nij)
{
Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("ComputeRadialBasis", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
double xij1 = l_rij(0+3*n);
double xij2 = l_rij(1+3*n);
double xij3 = l_rij(2+3*n);
@ -722,7 +722,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
template<class DeviceType>
void PairPODKokkos<DeviceType>::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2)
{
Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("MatrixMultiply", Kokkos::RangePolicy<DeviceType>(0,r1 * c2), KOKKOS_LAMBDA(int idx) {
int j = idx / r1; // Calculate column index
int i = idx % r1; // Calculate row index
double sum = 0.0;
@ -737,7 +737,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N)
{
Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) {
Kokkos::parallel_for("AngularBasis", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int j) {
double x = l_rij(j*3 + 0);
double y = l_rij(j*3 + 1);
double z = l_rij(j*3 + 2);
@ -817,7 +817,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
{
int totalIterations = l_nrbf3 * l_K3 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int k = idx % l_K3;
int temp = idx / l_K3;
int m = temp % l_nrbf3;
@ -835,7 +835,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
});
}
else {
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int k = idx % l_K3;
int temp = idx / l_K3;
int m = temp % l_nrbf3;
@ -863,7 +863,7 @@ void PairPODKokkos<DeviceType>::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_
int l_nrbf2, const int Ni, const int Nij)
{
int totalIterations = l_nrbf2 * Nij;
Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("twobodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx / l_nrbf2; // pair index
int m = idx % l_nrbf2; // rbd index
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
@ -876,7 +876,7 @@ void PairPODKokkos<DeviceType>::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod
t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij)
{
int totalIterations = l_nrbf2 * Nij;
Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("twobody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx / l_nrbf2; // pair index
int m = idx % l_nrbf2; // rbd index
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
@ -893,7 +893,7 @@ void PairPODKokkos<DeviceType>::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po
int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni)
{
int totalIterations = l_nrbf3 * Ni;
Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("ThreeBodyDesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int m = idx % l_nrbf3;
int i = idx / l_nrbf3;
int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i;
@ -925,7 +925,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
{
int totalIterations = l_nrbf3 * Nij;
if (l_nelements==1) {
Kokkos::parallel_for("threebody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
int idxR = j + Nij * m; // Pre-compute the index for rbf
@ -961,7 +961,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
}
else {
int N3 = Ni * l_nabf3 * l_nrbf3;
Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Derive the original j value
int m = idx % l_nrbf3; // Derive the original m value
int i2 = l_tj(j) - 1;
@ -1007,7 +1007,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
{
int totalIterations = l_nrbf3 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
for (int p = 0; p < l_nabf3; p++) {
@ -1024,7 +1024,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
}
else {
int N3 = Ni * l_nabf3 * l_nrbf3;
Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("threebody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf3; // Derive the original j value
int m = idx % l_nrbf3; // Derive the original m value
for (int p = 0; p < l_nabf3; p++) {
@ -1054,7 +1054,7 @@ void PairPODKokkos<DeviceType>::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_po
t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni)
{
int totalIterations = l_nrbf4 * Ni;
Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int m = idx % l_nrbf4;
int i = idx / l_nrbf4;
int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i;
@ -1092,7 +1092,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
{
int totalIterations = l_nrbf4 * Nij;
if (l_nelements==1) {
Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);
@ -1151,7 +1151,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
}
else {
int N3 = Ni * l_nabf4 * l_nrbf4;
Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxM = j + Nij * m;
@ -1241,7 +1241,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
{
int totalIterations = l_nrbf4 * Ni;
if (l_nelements==1) {
Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
int idxU = l_K3 * m + l_K3*l_nrbf3*i;
@ -1268,7 +1268,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
}
else {
int N3 = Ni * l_nabf4 * l_nrbf4;
Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("fourbody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx / l_nrbf4; // Derive the original j value
int m = idx % l_nrbf4; // Derive the original m value
for (int p = 0; p < l_nabf4; p++) {
@ -1311,7 +1311,7 @@ void PairPODKokkos<DeviceType>::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe
t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_K3, int Nij)
{
int totalIterations = l_nrbf3 * Nij;
Kokkos::parallel_for("allbody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("allbody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int j = idx / l_nrbf3; // Calculate j using integer division
int m = idx % l_nrbf3; // Calculate m using modulo operation
int i2 = l_tj(j) - 1;
@ -1346,7 +1346,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
{
int totalIterations = n12 * Ni;
Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("crossdesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int i = idx / Ni;
@ -1359,7 +1359,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
{
int totalIterations = n12 * Ni;
Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("crossdesc_reduction", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni; // Ni
int m = idx / Ni; // n12
int k1 = ind1(m); // dd1
@ -1375,7 +1375,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
template<class DeviceType>
void PairPODKokkos<DeviceType>::set_array_to_zero(t_pod_1d a, int N)
{
Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) {
Kokkos::parallel_for("initialize_array", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int i) {
a(i) = 0.0;
});
}
@ -1480,7 +1480,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
int nDes = Mdesc;
int nCoeff = nCoeffPerElement;
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
int nc = nCoeff*(tyai[n]-1);
ei[n] = cefs[0 + nc];
for (int m=0; m<nDes; m++)
@ -1488,7 +1488,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
});
int totalIterations = Ni*nDes;
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int m = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1516,7 +1516,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
int nCoeff = nCoeffPerElement;
int totalIterations = Ni*nCom;
Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("pca", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int k = idx / Ni;
double sum = 0.0;
@ -1528,7 +1528,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
});
totalIterations = Ni*nCls;
Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("inverse_square_distances", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int j = idx / Ni;
int typei = tyai[i]-1;
@ -1541,14 +1541,14 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
D[i + Ni*j] = 1.0 / sum;
});
Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) {
Kokkos::parallel_for("Probabilities", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int i) {
double sum = 0;
for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];
sumD[i] = sum;
for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;
});
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
int nc = nCoeff*(tyai[n]-1);
ei[n] = cefs[0 + nc];
for (int k = 0; k<nCls; k++)
@ -1556,7 +1556,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
});
Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int k = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1567,7 +1567,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
});
totalIterations = Ni*nDes;
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int n = idx % Ni;
int m = idx / Ni;
int nc = nCoeff*(tyai[n]-1);
@ -1577,7 +1577,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
cb[n + Ni*m] = sum;
});
Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
Kokkos::parallel_for("base_env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
int i = idx % Ni;
int m = idx / Ni;
int typei = tyai[i]-1;
@ -1670,7 +1670,7 @@ template<class DeviceType>
void PairPODKokkos<DeviceType>::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij)
{
auto l_f = f;
Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) {
Kokkos::parallel_for("TallyForce", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
int im = l_ai(n);
int jm = l_aj(n);
int n3 = 3*n;
@ -1694,7 +1694,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
// For global energy tally
if (eflag_global) {
double local_eng_vdwl = 0.0;
Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) {
Kokkos::parallel_reduce("GlobalEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k, E_FLOAT& update) {
update += l_ei(k);
}, local_eng_vdwl);
@ -1704,7 +1704,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
// For per-atom energy tally
if (eflag_atom) {
Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) {
Kokkos::parallel_for("PerAtomEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k) {
l_eatom(istart + k) += l_ei(k);
});
}
@ -1718,7 +1718,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
if (vflag_global) {
for (int j=0; j<3; j++) {
F_FLOAT sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(j + k3) * l_fij(j + k3);
}, sum);
@ -1726,21 +1726,21 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
}
F_FLOAT sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(k3) * l_fij(1 + k3);
}, sum);
virial[3] -= sum;
sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(k3) * l_fij(2 + k3);
}, sum);
virial[4] -= sum;
sum = 0.0;
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
int k3 = 3*k;
update += l_rij(1+k3) * l_fij(2+k3);
}, sum);
@ -1748,7 +1748,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
}
if (vflag_atom) {
Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) {
Kokkos::parallel_for("PerAtomStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k) {
int i = l_ai(k);
int j = l_aj(k);
int k3 = 3*k;

View File

@ -385,13 +385,13 @@ void PairReaxFFKokkos<DeviceType>::init_md()
swb = api->control->nonb_cut;
enobondsflag = api->control->enobondsflag;
if (fabs(swa) > 0.01)
error->warning(FLERR,"Warning: non-zero lower Taper-radius cutoff");
if ((fabs(swa) > 0.01) && (comm->me == 0))
error->warning(FLERR, "Non-zero lower Taper-radius cutoff");
if (swb < 0)
error->one(FLERR,"Negative upper Taper-radius cutoff");
else if (swb < 5)
error->one(FLERR,"Warning: very low Taper-radius cutoff: {}\n", swb);
if (swb < 0.0) {
error->all(FLERR,"Negative upper Taper-radius cutoff");
} else if ((swb < 5.0) && (comm->me ==0))
error->warning(FLERR,"Very low Taper-radius cutoff: {}\n", swb);
d1 = swb - swa;
d7 = powint(d1,7);

View File

@ -39,7 +39,7 @@ RandPoolWrap::~RandPoolWrap()
void RandPoolWrap::destroy()
{
if (random_thr) {
for (int i=1; i < nthreads; ++i)
for (int i = 1; i < nthreads; ++i)
delete random_thr[i];
delete[] random_thr;
@ -51,7 +51,7 @@ void RandPoolWrap::init(RanMars* random, int seed)
{
// deallocate pool of RNGs
if (random_thr) {
for (int i=1; i < this->nthreads; ++i)
for (int i = 1; i < nthreads; ++i)
delete random_thr[i];
delete[] random_thr;

View File

@ -59,10 +59,12 @@ class RandPoolWrap : protected Pointers {
typedef Kokkos::Experimental::UniqueToken<
LMPHostType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
#ifndef LMP_KOKKOS_GPU
unique_token_type unique_token;
int tid = (int) unique_token.acquire();
rand_wrap.rng = random_thr[tid];
unique_token.release(tid);
#endif
return rand_wrap;
}

View File

@ -63,10 +63,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase {
double xs, ys, zs;
double xnear[3], xorig[3];
if (dynamic) {
xorig[0] = x; xorig[1] = y; xorig[2] = z;
xorig[0] = x; xorig[1] = y; xorig[2] = z;
if (dynamic)
inverse_transform(x, y, z);
}
xnear[0] = x; xnear[1] = y; xnear[2] = z;

View File

@ -53,6 +53,8 @@ lmpinstalledpkgs.h
lmpgitversion.h
mliap_model_python_couple.cpp
mliap_model_python_couple.h
# removed in Dec 2024
group_kokkos.cpp
# renamed in September 2024
group_ndx.cpp
group_ndx.h

View File

@ -80,15 +80,14 @@ namespace ReaxFF {
swa = control->nonb_low;
swb = control->nonb_cut;
if (fabs(swa) > 0.01 && control->me == 0)
if ((fabs(swa) > 0.01) && (control->me == 0))
error->warning(FLERR, "Non-zero lower Taper-radius cutoff");
if (swb < 0) {
if (swb < 0.0) {
error->all(FLERR,"Negative upper Taper-radius cutoff");
}
else if (swb < 5 && control->me == 0)
error->warning(FLERR,fmt::format("Warning: very low Taper-radius cutoff: "
"{}\n", swb));
} else if ((swb < 5.0) && (control->me == 0))
error->warning(FLERR,fmt::format("Very low Taper-radius cutoff: {}\n", swb));
d1 = swb - swa;
d7 = pow(d1, 7.0);
swa2 = SQR(swa);

View File

@ -23,6 +23,7 @@
#include "comm_kokkos.h" // IWYU pragma: export
#include "comm_tiled_kokkos.h" // IWYU pragma: export
#include "domain_kokkos.h" // IWYU pragma: export
#include "group_kokkos.h" // IWYU pragma: export
#include "kokkos.h" // IWYU pragma: export
#include "memory_kokkos.h" // IWYU pragma: export
#include "modify_kokkos.h" // IWYU pragma: export
@ -39,6 +40,7 @@
#include "comm_brick.h"
#include "comm_tiled.h"
#include "domain.h"
#include "group.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
@ -86,6 +88,11 @@ class DomainKokkos : public Domain {
DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {}
};
class GroupKokkos : public Group {
public:
GroupKokkos(class LAMMPS *lmp) : Group(lmp) {}
};
class NeighborKokkos : public Neighbor {
public:
NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {}

View File

@ -2507,7 +2507,8 @@ void CommTiled::deallocate_swap(int n)
memory->destroy(sendbox_multi[i]);
memory->destroy(sendbox_multiold[i]);
delete [] maxsendlist[i];
if (maxsendlist)
delete [] maxsendlist[i];
if (sendlist && sendlist[i]) {
for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);

View File

@ -153,7 +153,7 @@ class CommTiled : public Comm {
virtual void grow_swap_send(int, int, int); // grow swap arrays for send and recv
void grow_swap_send_multi(int, int); // grow multi swap arrays for send and recv
void grow_swap_recv(int, int);
void deallocate_swap(int); // deallocate swap arrays
void deallocate_swap(int); // deallocate swap arrays
};
} // namespace LAMMPS_NS

View File

@ -264,6 +264,8 @@ class Fix : protected Pointers {
virtual double memory_usage() { return 0.0; }
void set_copymode(int value) { copymode = value; }
protected:
int instance_me; // which Fix class instantiation I am

View File

@ -191,6 +191,8 @@ FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) :
FixLangevin::~FixLangevin()
{
if (copymode) return;
delete random;
delete[] tstr;
delete[] gfactor1;
@ -509,7 +511,7 @@ void FixLangevin::post_force(int /*vflag*/)
else post_force_templated<1,0,0,0,0,0>();
else
if (gjfflag)
if (tallyflag || osflag)
if (tallyflag || osflag)
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) post_force_templated<0,1,1,1,1,1>();

View File

@ -872,7 +872,9 @@ void LAMMPS::create()
else
atom->create_avec("atomic",0,nullptr,1);
group = new Group(this);
if (kokkos) group = new GroupKokkos(this);
else group = new Group(this);
force = new Force(this); // must be after group, to create temperature
if (kokkos) modify = new ModifyKokkos(this);

View File

@ -1,6 +1,7 @@
---
lammps_version: 28 Mar 2023
date_generated: Fri Apr 7 18:04:29 2023
tags: unstable
epsilon: 7.5e-13
skip_tests: single
prerequisites: ! |

View File

@ -1,6 +1,7 @@
---
lammps_version: 28 Mar 2023
date_generated: Thu Mar 30 14:38:22 2023
tags: unstable
epsilon: 7.5e-13
skip_tests: single
prerequisites: ! |

View File

@ -1,6 +1,7 @@
---
lammps_version: 28 Mar 2023
date_generated: Fri Apr 7 18:07:13 2023
tags: unstable
epsilon: 7.5e-13
skip_tests: single
prerequisites: ! |

View File

@ -1,7 +1,7 @@
---
lammps_version: 8 Feb 2023
date_generated: Tue Mar 7 21:07:27 2023
epsilon: 2.5e-13
epsilon: 5.0e-13
skip_tests: extract
prerequisites: ! |
atom full

View File

@ -1,8 +1,7 @@
---
lammps_version: 7 Feb 2024
tags:
date_generated: Sat Apr 13 11:41:16 2024
epsilon: 5.0e-11
epsilon: 2.0e-10
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 29 Aug 2024
date_generated: Tue Oct 1 12:45:25 2024
epsilon: 2e-13
epsilon: 1.0e-11
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 29 Aug 2024
date_generated: Tue Oct 1 12:45:46 2024
epsilon: 1e-12
epsilon: 2.5e-11
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 17 Feb 2022
date_generated: Thu Mar 17 19:43:17 2022
epsilon: 2e-14
epsilon: 5.0e-14
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 17 Feb 2022
date_generated: Fri Mar 18 22:18:01 2022
epsilon: 4e-14
epsilon: 5.0e-14
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 8 Feb 2023
date_generated: Thu Feb 23 00:40:51 2023
epsilon: 4e-14
epsilon: 5.0e-14
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 27 Jun 2024
date_generated: Fri Aug 2 23:56:34 2024
epsilon: 2e-14
epsilon: 1.0e-13
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 8 Feb 2023
date_generated: Thu Feb 23 15:26:55 2023
epsilon: 4e-14
epsilon: 1.0e-13
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 8 Feb 2023
date_generated: Thu Feb 23 00:56:30 2023
epsilon: 4e-14
epsilon: 2.0e-13
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 8 Feb 2023
date_generated: Thu Feb 23 00:56:30 2023
epsilon: 4e-14
epsilon: 2.0e-13
skip_tests:
prerequisites: ! |
atom full

View File

@ -1,7 +1,7 @@
---
lammps_version: 17 Feb 2022
date_generated: Fri Mar 18 22:17:48 2022
epsilon: 7.5e-09
epsilon: 1.5e-08
skip_tests:
prerequisites: ! |
pair pace

View File

@ -1,7 +1,7 @@
---
lammps_version: 10 Mar 2021
date_generated: Wed Apr 7 19:30:07 2021
epsilon: 7.5e-09
epsilon: 1.5e-08
prerequisites: ! |
pair pace
pre_commands: ! |

View File

@ -1,7 +1,7 @@
---
lammps_version: 21 Nov 2023
date_generated: Thu Jan 18 11:01:50 2024
epsilon: 5e-14
epsilon: 1e-13
skip_tests: intel
prerequisites: ! |
atom full