Merge branch 'develop' into collected-small-changes
This commit is contained in:
81
.github/workflows/unittest-arm64.yml
vendored
Normal file
81
.github/workflows/unittest-arm64.yml
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
# GitHub action to build LAMMPS on Linux with ARM64 and run standard unit tests
|
||||
name: "Unittest for Linux on ARM64"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [develop]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Linux ARM64 Unit Test
|
||||
if: ${{ github.repository == 'lammps/lammps' }}
|
||||
runs-on: ubuntu-22.04-arm
|
||||
env:
|
||||
CCACHE_DIR: ${{ github.workspace }}/.ccache
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install extra packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache \
|
||||
libeigen3-dev \
|
||||
libcurl4-openssl-dev \
|
||||
mold \
|
||||
ninja-build \
|
||||
python3-dev
|
||||
|
||||
- name: Create Build Environment
|
||||
run: mkdir build
|
||||
|
||||
- name: Set up ccache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.CCACHE_DIR }}
|
||||
key: linux-unit-ccache-${{ github.sha }}
|
||||
restore-keys: linux-unit-ccache-
|
||||
|
||||
- name: Building LAMMPS via CMake
|
||||
shell: bash
|
||||
run: |
|
||||
ccache -z
|
||||
python3 -m venv linuxenv
|
||||
source linuxenv/bin/activate
|
||||
python3 -m pip install numpy
|
||||
python3 -m pip install pyyaml
|
||||
cmake -S cmake -B build \
|
||||
-C cmake/presets/gcc.cmake \
|
||||
-C cmake/presets/most.cmake \
|
||||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-D BUILD_SHARED_LIBS=on \
|
||||
-D DOWNLOAD_POTENTIALS=off \
|
||||
-D ENABLE_TESTING=on \
|
||||
-D MLIAP_ENABLE_ACE=on \
|
||||
-D MLIAP_ENABLE_PYTHON=off \
|
||||
-D PKG_MANIFOLD=on \
|
||||
-D PKG_ML-PACE=on \
|
||||
-D PKG_ML-RANN=on \
|
||||
-D PKG_RHEO=on \
|
||||
-D PKG_PTM=on \
|
||||
-D PKG_PYTHON=on \
|
||||
-D PKG_QTB=on \
|
||||
-D PKG_SMTBQ=on \
|
||||
-G Ninja
|
||||
cmake --build build
|
||||
ccache -s
|
||||
|
||||
- name: Run Tests
|
||||
working-directory: build
|
||||
shell: bash
|
||||
run: ctest -V -LE unstable
|
||||
@ -117,7 +117,6 @@ set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/group_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
|
||||
|
||||
@ -230,7 +230,6 @@ action fix_wall_region_kokkos.cpp
|
||||
action fix_wall_region_kokkos.h
|
||||
action grid3d_kokkos.cpp fft3d.h
|
||||
action grid3d_kokkos.h fft3d.h
|
||||
action group_kokkos.cpp
|
||||
action group_kokkos.h
|
||||
action improper_class2_kokkos.cpp improper_class2.cpp
|
||||
action improper_class2_kokkos.h improper_class2.h
|
||||
|
||||
@ -72,14 +72,14 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
if(k_eatom.extent(0) < maxeatom) {
|
||||
if ((int)k_eatom.extent(0) < maxeatom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
|
||||
d_eatom = k_eatom.template view<DeviceType>();
|
||||
} else Kokkos::deep_copy(d_eatom,0.0);
|
||||
}
|
||||
if (vflag_atom) {
|
||||
if(k_vatom.extent(0) < maxvatom) {
|
||||
if ((int)k_vatom.extent(0) < maxvatom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"angle:vatom");
|
||||
d_vatom = k_vatom.template view<DeviceType>();
|
||||
|
||||
@ -76,7 +76,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag)
|
||||
|
||||
Kokkos::parallel_for(nanglelist_orig,LAMMPS_LAMBDA(int i) {
|
||||
const int m = d_map[d_anglelist_orig(i,3)];
|
||||
if (m >= 0) Kokkos::atomic_increment(&d_nanglelist[m]);
|
||||
if (m >= 0) Kokkos::atomic_inc(&d_nanglelist[m]);
|
||||
});
|
||||
|
||||
k_nanglelist.modify_device();
|
||||
@ -87,7 +87,7 @@ void AngleHybridKokkos::compute(int eflag, int vflag)
|
||||
if (h_nanglelist[m] > maxangle_all)
|
||||
maxangle_all = h_nanglelist[m] + EXTRA;
|
||||
|
||||
if (k_anglelist.d_view.extent(1) < maxangle_all)
|
||||
if ((int)k_anglelist.d_view.extent(1) < maxangle_all)
|
||||
MemKK::realloc_kokkos(k_anglelist, "angle_hybrid:anglelist", nstyles, maxangle_all, 4);
|
||||
auto d_anglelist = k_anglelist.d_view;
|
||||
|
||||
|
||||
@ -67,14 +67,14 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
if (k_eatom.extent(0) < maxeatom) {
|
||||
if ((int)k_eatom.extent(0) < maxeatom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
|
||||
d_eatom = k_eatom.template view<KKDeviceType>();
|
||||
} else Kokkos::deep_copy(d_eatom,0.0);
|
||||
}
|
||||
if (vflag_atom) {
|
||||
if (k_vatom.extent(0) < maxvatom) {
|
||||
if ((int)k_vatom.extent(0) < maxvatom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
|
||||
d_vatom = k_vatom.template view<KKDeviceType>();
|
||||
|
||||
@ -76,7 +76,7 @@ void BondHybridKokkos::compute(int eflag, int vflag)
|
||||
|
||||
Kokkos::parallel_for(nbondlist_orig,LAMMPS_LAMBDA(int i) {
|
||||
const int m = d_map[d_bondlist_orig(i,2)];
|
||||
if (m >= 0) Kokkos::atomic_increment(&d_nbondlist[m]);
|
||||
if (m >= 0) Kokkos::atomic_inc(&d_nbondlist[m]);
|
||||
});
|
||||
|
||||
k_nbondlist.modify_device();
|
||||
@ -87,7 +87,7 @@ void BondHybridKokkos::compute(int eflag, int vflag)
|
||||
if (h_nbondlist[m] > maxbond_all)
|
||||
maxbond_all = h_nbondlist[m] + EXTRA;
|
||||
|
||||
if (k_bondlist.d_view.extent(1) < maxbond_all)
|
||||
if ((int)k_bondlist.d_view.extent(1) < maxbond_all)
|
||||
MemKK::realloc_kokkos(k_bondlist, "bond_hybrid:bondlist", nstyles, maxbond_all, 3);
|
||||
auto d_bondlist = k_bondlist.d_view;
|
||||
|
||||
|
||||
@ -37,6 +37,8 @@ static constexpr int BUFEXTRA = 1000;
|
||||
CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp)
|
||||
{
|
||||
sendlist = nullptr;
|
||||
maxsendlist = nullptr;
|
||||
nprocmaxtot = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -49,6 +51,8 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp)
|
||||
CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,oldcomm)
|
||||
{
|
||||
sendlist = nullptr;
|
||||
maxsendlist = nullptr;
|
||||
nprocmaxtot = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -56,7 +60,9 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,o
|
||||
CommTiledKokkos::~CommTiledKokkos()
|
||||
{
|
||||
memoryKK->destroy_kokkos(k_sendlist,sendlist);
|
||||
memory->destroy(maxsendlist);
|
||||
sendlist = nullptr;
|
||||
maxsendlist = nullptr;
|
||||
buf_send = nullptr;
|
||||
buf_recv = nullptr;
|
||||
}
|
||||
@ -657,12 +663,11 @@ void CommTiledKokkos::grow_list(int iswap, int iwhich, int n)
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
|
||||
if (size > (int)k_sendlist.extent(2)) {
|
||||
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,maxsend,size,"comm:sendlist");
|
||||
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,size,"comm:sendlist");
|
||||
|
||||
for (int i = 0; i < maxswap; i++)
|
||||
maxsendlist[iswap][iwhich] = size;
|
||||
}
|
||||
for (int i = 0; i < maxswap; i++)
|
||||
for (int j = 0; j < nprocmaxtot; j++)
|
||||
maxsendlist[i][j] = size;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -692,24 +697,23 @@ void CommTiledKokkos::grow_swap_send(int i, int n, int /*nold*/)
|
||||
memory->destroy(sendbox_multiold[i]);
|
||||
memory->create(sendbox_multiold[i],n,atom->ntypes+1,6,"comm:sendbox_multiold");
|
||||
|
||||
delete [] maxsendlist[i];
|
||||
maxsendlist[i] = new int[n];
|
||||
|
||||
for (int j = 0; j < n; j++)
|
||||
maxsendlist[i][j] = BUFMIN;
|
||||
|
||||
if (sendlist && !k_sendlist.d_view.data()) {
|
||||
for (int ii = 0; ii < maxswap; ii++) {
|
||||
if (sendlist[ii]) {
|
||||
for (int jj = 0; jj < nprocmax[ii]; jj++)
|
||||
memory->destroy(sendlist[ii][jj]);
|
||||
delete [] sendlist[ii];
|
||||
}
|
||||
}
|
||||
if (sendlist && !k_sendlist.h_view.data()) {
|
||||
delete [] sendlist;
|
||||
delete [] maxsendlist;
|
||||
|
||||
sendlist = nullptr;
|
||||
maxsendlist = nullptr;
|
||||
} else {
|
||||
memoryKK->destroy_kokkos(k_sendlist,sendlist);
|
||||
memory->destroy(maxsendlist);
|
||||
}
|
||||
|
||||
memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,n,BUFMIN,"comm:sendlist");
|
||||
nprocmaxtot = MAX(nprocmaxtot,n);
|
||||
|
||||
memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,nprocmaxtot,BUFMIN,"comm:sendlist");
|
||||
memory->create(maxsendlist,maxswap,nprocmaxtot,"comm:maxsendlist");
|
||||
|
||||
for (int i = 0; i < maxswap; i++)
|
||||
for (int j = 0; j < nprocmaxtot; j++)
|
||||
maxsendlist[i][j] = BUFMIN;
|
||||
}
|
||||
|
||||
@ -64,18 +64,17 @@ class CommTiledKokkos : public CommTiled {
|
||||
template<class DeviceType> void reverse_comm_device();
|
||||
|
||||
protected:
|
||||
int nprocmaxtot;
|
||||
|
||||
DAT::tdual_int_3d k_sendlist;
|
||||
//DAT::tdual_int_scalar k_total_send;
|
||||
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
||||
//DAT::tdual_int_scalar k_count;
|
||||
|
||||
void grow_send(int, int) override;
|
||||
void grow_recv(int, int flag = 0) override;
|
||||
void grow_send(int, int) override; // reallocate send buffer
|
||||
void grow_recv(int, int flag = 0) override; // free/allocate recv buffer
|
||||
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
|
||||
void grow_recv_kokkos(int, int, ExecutionSpace space = Host);
|
||||
void grow_list(int, int, int) override;
|
||||
void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv
|
||||
void grow_list(int, int, int) override; // reallocate sendlist for one swap/proc
|
||||
void grow_swap_send(int, int, int) override; // grow swap arrays for send and recv
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -75,14 +75,14 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
if(k_eatom.extent(0) < maxeatom) {
|
||||
if ((int)k_eatom.extent(0) < maxeatom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
} else Kokkos::deep_copy(d_eatom,0.0);
|
||||
}
|
||||
if (vflag_atom) {
|
||||
if(k_vatom.extent(0) < maxvatom) {
|
||||
if ((int)k_vatom.extent(0) < maxvatom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
|
||||
@ -76,7 +76,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag)
|
||||
|
||||
Kokkos::parallel_for(ndihedrallist_orig,LAMMPS_LAMBDA(int i) {
|
||||
const int m = d_map[d_dihedrallist_orig(i,4)];
|
||||
if (m >= 0) Kokkos::atomic_increment(&d_ndihedrallist[m]);
|
||||
if (m >= 0) Kokkos::atomic_inc(&d_ndihedrallist[m]);
|
||||
});
|
||||
|
||||
k_ndihedrallist.modify_device();
|
||||
@ -87,7 +87,7 @@ void DihedralHybridKokkos::compute(int eflag, int vflag)
|
||||
if (h_ndihedrallist[m] > maxdihedral_all)
|
||||
maxdihedral_all = h_ndihedrallist[m] + EXTRA;
|
||||
|
||||
if (k_dihedrallist.d_view.extent(1) < maxdihedral_all)
|
||||
if ((int)k_dihedrallist.d_view.extent(1) < maxdihedral_all)
|
||||
MemKK::realloc_kokkos(k_dihedrallist, "dihedral_hybrid:dihedrallist", nstyles, maxdihedral_all, 5);
|
||||
auto d_dihedrallist = k_dihedrallist.d_view;
|
||||
|
||||
|
||||
@ -690,7 +690,7 @@ int FixCMAPKokkos<DeviceType>::pack_exchange_kokkos(
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_scan(nsend, KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,nsend), KOKKOS_LAMBDA(const int &mysend, int &offset, const bool &final) {
|
||||
|
||||
const int i = d_exchange_sendlist(mysend);
|
||||
|
||||
@ -782,7 +782,7 @@ void FixCMAPKokkos<DeviceType>::unpack_exchange_kokkos(
|
||||
|
||||
copymode = 1;
|
||||
|
||||
Kokkos::parallel_for(nrecv, KOKKOS_LAMBDA(const int &i) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nrecv), KOKKOS_LAMBDA(const int &i) {
|
||||
int index = d_indices(i);
|
||||
if (index > -1) {
|
||||
int m = d_ubuf(d_buf(i)).i;
|
||||
|
||||
@ -39,7 +39,12 @@ enum { CONSTANT, EQUAL, ATOM };
|
||||
|
||||
template<class DeviceType>
|
||||
FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me)
|
||||
FixLangevin(lmp, narg, arg),
|
||||
#ifdef LMP_KOKKOS_DEBUG_RNG
|
||||
rand_pool(seed + comm->me, lmp)
|
||||
#else
|
||||
rand_pool(seed + comm->me)
|
||||
#endif
|
||||
{
|
||||
kokkosable = 1;
|
||||
fuse_integrate_flag = 1;
|
||||
@ -48,43 +53,42 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
|
||||
int ntypes = atomKK->ntypes;
|
||||
|
||||
// allocate per-type arrays for force prefactors
|
||||
delete[] gfactor1;
|
||||
delete[] gfactor2;
|
||||
delete[] ratio;
|
||||
memoryKK->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1");
|
||||
memoryKK->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2");
|
||||
memoryKK->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio");
|
||||
d_gfactor1 = k_gfactor1.template view<DeviceType>();
|
||||
h_gfactor1 = k_gfactor1.template view<LMPHostType>();
|
||||
h_gfactor1 = k_gfactor1.h_view;
|
||||
d_gfactor2 = k_gfactor2.template view<DeviceType>();
|
||||
h_gfactor2 = k_gfactor2.template view<LMPHostType>();
|
||||
h_gfactor2 = k_gfactor2.h_view;
|
||||
d_ratio = k_ratio.template view<DeviceType>();
|
||||
h_ratio = k_ratio.template view<LMPHostType>();
|
||||
h_ratio = k_ratio.h_view;
|
||||
|
||||
// optional args
|
||||
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
|
||||
k_ratio.template modify<LMPHostType>();
|
||||
k_ratio.modify_host();
|
||||
|
||||
if (gjfflag) {
|
||||
memory->destroy(franprev);
|
||||
memory->destroy(lv);
|
||||
grow_arrays(atomKK->nmax);
|
||||
atom->add_callback(Atom::GROW);
|
||||
|
||||
// initialize franprev to zero
|
||||
for (int i = 0; i < atomKK->nlocal; i++) {
|
||||
franprev[i][0] = 0.0;
|
||||
franprev[i][1] = 0.0;
|
||||
franprev[i][2] = 0.0;
|
||||
lv[i][0] = 0.0;
|
||||
lv[i][1] = 0.0;
|
||||
lv[i][2] = 0.0;
|
||||
}
|
||||
k_franprev.template modify<LMPHostType>();
|
||||
k_lv.template modify<LMPHostType>();
|
||||
|
||||
Kokkos::deep_copy(d_franprev,0.0);
|
||||
Kokkos::deep_copy(d_lv,0.0);
|
||||
}
|
||||
|
||||
if (zeroflag) {
|
||||
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
|
||||
h_fsumall = k_fsumall.template view<LMPHostType>();
|
||||
h_fsumall = k_fsumall.h_view;
|
||||
d_fsumall = k_fsumall.template view<DeviceType>();
|
||||
}
|
||||
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
|
||||
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
|
||||
datamask_modify = F_MASK;
|
||||
}
|
||||
|
||||
@ -93,13 +97,21 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
|
||||
template<class DeviceType>
|
||||
FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memoryKK->destroy_kokkos(k_gfactor1,gfactor1);
|
||||
memoryKK->destroy_kokkos(k_gfactor2,gfactor2);
|
||||
memoryKK->destroy_kokkos(k_ratio,ratio);
|
||||
memoryKK->destroy_kokkos(k_flangevin,flangevin);
|
||||
if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
|
||||
if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
|
||||
if (gjfflag) {
|
||||
memoryKK->destroy_kokkos(k_franprev,franprev);
|
||||
memoryKK->destroy_kokkos(k_lv,lv);
|
||||
}
|
||||
memoryKK->destroy_kokkos(k_tforce,tforce);
|
||||
|
||||
#ifdef LMP_KOKKOS_DEBUG_RNG
|
||||
rand_pool.destroy();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -118,8 +130,170 @@ void FixLangevinKokkos<DeviceType>::init()
|
||||
error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians");
|
||||
|
||||
// prefactors are modified in the init
|
||||
k_gfactor1.template modify<LMPHostType>();
|
||||
k_gfactor2.template modify<LMPHostType>();
|
||||
k_gfactor1.modify_host();
|
||||
k_gfactor2.modify_host();
|
||||
|
||||
#ifdef LMP_KOKKOS_DEBUG_RNG
|
||||
rand_pool.init(random,seed + comm->me);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::setup(int vflag)
|
||||
{
|
||||
if (gjfflag) {
|
||||
double dt = update->dt;
|
||||
double ftm2v = force->ftm2v;
|
||||
auto v = atomKK->k_v.view<DeviceType>();
|
||||
auto f = atomKK->k_f.view<DeviceType>();
|
||||
auto mask = atomKK->k_mask.view<DeviceType>();
|
||||
int nlocal = atom->nlocal;
|
||||
auto rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
auto mass = atomKK->k_mass.view<DeviceType>();
|
||||
auto type = atomKK->k_type.view<DeviceType>();
|
||||
auto groupbit = this->groupbit;
|
||||
auto gjfa = this->gjfa;
|
||||
auto gjfsib = this->gjfsib;
|
||||
|
||||
if (atom->rmass) {
|
||||
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
|
||||
v(i,0) -= dtfm * f(i,0);
|
||||
v(i,1) -= dtfm * f(i,1);
|
||||
v(i,2) -= dtfm * f(i,2);
|
||||
}
|
||||
});
|
||||
|
||||
if (tbiasflag) {
|
||||
// account for bias velocity
|
||||
if (temperature->kokkosable) {
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all_kk();
|
||||
} else {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all();
|
||||
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
|
||||
atomKK->sync(execution_space,temperature->datamask_modify);
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
v(i,0) /= gjfa * gjfsib * gjfsib;
|
||||
v(i,1) /= gjfa * gjfsib * gjfsib;
|
||||
v(i,2) /= gjfa * gjfsib * gjfsib;
|
||||
}
|
||||
});
|
||||
|
||||
if (tbiasflag) {
|
||||
if (temperature->kokkosable) temperature->restore_bias_all();
|
||||
else {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->restore_bias_all();
|
||||
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
|
||||
atomKK->sync(execution_space,temperature->datamask_modify);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
|
||||
v(i,0) -= dtfm * f(i,0);
|
||||
v(i,1) -= dtfm * f(i,1);
|
||||
v(i,2) -= dtfm * f(i,2);
|
||||
}
|
||||
});
|
||||
|
||||
if (tbiasflag) {
|
||||
// account for bias velocity
|
||||
if (temperature->kokkosable) {
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all_kk();
|
||||
} else {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all();
|
||||
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
|
||||
atomKK->sync(execution_space,temperature->datamask_modify);
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
v(i,0) /= gjfa * gjfsib * gjfsib;
|
||||
v(i,1) /= gjfa * gjfsib * gjfsib;
|
||||
v(i,2) /= gjfa * gjfsib * gjfsib;
|
||||
}
|
||||
});
|
||||
|
||||
if (tbiasflag) {
|
||||
if (temperature->kokkosable) temperature->restore_bias_all();
|
||||
else {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->restore_bias_all();
|
||||
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
|
||||
atomKK->sync(execution_space,temperature->datamask_modify);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
}
|
||||
|
||||
post_force(vflag);
|
||||
|
||||
if (gjfflag) {
|
||||
double dt = update->dt;
|
||||
double ftm2v = force->ftm2v;
|
||||
auto f = atomKK->k_f.view<DeviceType>();
|
||||
auto v = atomKK->k_v.view<DeviceType>();
|
||||
auto mask = atomKK->k_mask.view<DeviceType>();
|
||||
int nlocal = atom->nlocal;
|
||||
auto rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
auto mass = atomKK->k_mass.view<DeviceType>();
|
||||
auto type = atomKK->k_type.view<DeviceType>();
|
||||
auto groupbit = this->groupbit;
|
||||
|
||||
k_lv.template sync<DeviceType>();
|
||||
auto l_lv = d_lv;
|
||||
|
||||
if (atom->rmass) {
|
||||
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|RMASS_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
l_lv(i,0) = v(i,0);
|
||||
l_lv(i,1) = v(i,1);
|
||||
l_lv(i,2) = v(i,2);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
atomKK->sync(execution_space,V_MASK|F_MASK|MASK_MASK|TYPE_MASK);
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int &i) {
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
l_lv(i,0) = v(i,0);
|
||||
l_lv(i,1) = v(i,1);
|
||||
l_lv(i,2) = v(i,2);
|
||||
}
|
||||
});
|
||||
}
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
k_lv.template modify<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -129,10 +303,10 @@ void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
{
|
||||
memoryKK->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev");
|
||||
d_franprev = k_franprev.template view<DeviceType>();
|
||||
h_franprev = k_franprev.template view<LMPHostType>();
|
||||
h_franprev = k_franprev.h_view;
|
||||
memoryKK->grow_kokkos(k_lv,lv,nmax,3,"langevin:lv");
|
||||
d_lv = k_lv.template view<DeviceType>();
|
||||
h_lv = k_lv.template view<LMPHostType>();
|
||||
h_lv = k_lv.h_view;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -141,7 +315,6 @@ template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
@ -150,6 +323,8 @@ void FixLangevinKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
FixLangevinKokkosInitialIntegrateFunctor<DeviceType> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -184,6 +359,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
f = atomKK->k_f.template view<DeviceType>();
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
mass = atomKK->k_mass.template view<DeviceType>();
|
||||
type = atomKK->k_type.template view<DeviceType>();
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
|
||||
@ -197,7 +373,8 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
dt = update->dt;
|
||||
mvv2e = force->mvv2e;
|
||||
ftm2v = force->ftm2v;
|
||||
fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e);
|
||||
fran_prop_const = sqrt(2.0*boltz/t_period/dt/mvv2e);
|
||||
fran_prop_const_gjf = sqrt(24.0*boltz/t_period/dt/mvv2e);
|
||||
|
||||
compute_target(); // modifies tforce vector, hence sync here
|
||||
k_tforce.template sync<DeviceType>();
|
||||
@ -220,7 +397,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
maxatom1 = atomKK->nmax;
|
||||
memoryKK->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin");
|
||||
d_flangevin = k_flangevin.template view<DeviceType>();
|
||||
h_flangevin = k_flangevin.template view<LMPHostType>();
|
||||
h_flangevin = k_flangevin.h_view;
|
||||
}
|
||||
}
|
||||
|
||||
@ -550,7 +727,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
h_fsumall(0) = fsumall[0]/count;
|
||||
h_fsumall(1) = fsumall[1]/count;
|
||||
h_fsumall(2) = fsumall[2]/count;
|
||||
k_fsumall.template modify<LMPHostType>();
|
||||
k_fsumall.modify_host();
|
||||
k_fsumall.template sync<DeviceType>();
|
||||
// set total force zero in parallel on the device
|
||||
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
|
||||
@ -581,20 +758,30 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
rand_type rand_gen = rand_pool.get_state();
|
||||
|
||||
if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
|
||||
if (Tp_RMASS) {
|
||||
gamma1 = -rmass[i] / t_period / ftm2v;
|
||||
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
|
||||
gamma1 *= 1.0/d_ratio[type[i]];
|
||||
if (Tp_GJF)
|
||||
gamma2 = sqrt(rmass[i]) * fran_prop_const_gjf / ftm2v;
|
||||
else
|
||||
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
|
||||
gamma1 *= 1.0/ratio[type[i]];
|
||||
gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t;
|
||||
} else {
|
||||
gamma1 = d_gfactor1[type[i]];
|
||||
gamma2 = d_gfactor2[type[i]] * tsqrt_t;
|
||||
}
|
||||
|
||||
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
if (Tp_GJF) {
|
||||
fran[0] = gamma2 * rand_gen.normal(); //random->gaussian()
|
||||
fran[1] = gamma2 * rand_gen.normal(); //random->gaussian()
|
||||
fran[2] = gamma2 * rand_gen.normal(); //random->gaussian()
|
||||
} else {
|
||||
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
}
|
||||
|
||||
if (Tp_BIAS) {
|
||||
fdrag[0] = gamma1*v(i,0);
|
||||
@ -678,7 +865,6 @@ void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
|
||||
f(i,1) -= d_fsumall[1];
|
||||
f(i,2) -= d_fsumall[2];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -740,7 +926,7 @@ void FixLangevinKokkos<DeviceType>::reset_dt()
|
||||
force->ftm2v;
|
||||
h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]);
|
||||
}
|
||||
k_gfactor2.template modify<LMPHostType>();
|
||||
k_gfactor2.modify_host();
|
||||
}
|
||||
|
||||
}
|
||||
@ -781,9 +967,15 @@ KOKKOS_INLINE_FUNCTION
|
||||
double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const
|
||||
{
|
||||
double my_energy = 0.0;
|
||||
if (mask[i] & groupbit)
|
||||
my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
|
||||
d_flangevin(i,2)*v(i,2);
|
||||
if (mask[i] & groupbit) {
|
||||
if (gjfflag) {
|
||||
my_energy = d_flangevin(i,0)*d_lv(i,0) + d_flangevin(i,1)*d_lv(i,1) +
|
||||
d_flangevin(i,2)*d_lv(i,2);
|
||||
} else {
|
||||
my_energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
|
||||
d_flangevin(i,2)*v(i,2);
|
||||
}
|
||||
}
|
||||
return my_energy;
|
||||
}
|
||||
|
||||
@ -796,30 +988,42 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
|
||||
{
|
||||
if (!tallyflag && !gjfflag) return;
|
||||
|
||||
dt = update->dt;
|
||||
ftm2v = force->ftm2v;
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
f = atomKK->k_f.template view<DeviceType>();
|
||||
rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
mass = atomKK->k_mass.template view<DeviceType>();
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space,V_MASK | MASK_MASK);
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
energy_onestep = 0.0;
|
||||
|
||||
atomKK->sync(execution_space,V_MASK | MASK_MASK);
|
||||
if (gjfflag) k_lv.template sync<DeviceType>();
|
||||
k_flangevin.template sync<DeviceType>();
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
|
||||
|
||||
if (tallyflag) {
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
|
||||
}
|
||||
|
||||
if (gjfflag) {
|
||||
if (rmass.data()) {
|
||||
atomKK->sync(execution_space,RMASS_MASK);
|
||||
FixLangevinKokkosEndOfStepFunctor<DeviceType,1> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
} else {
|
||||
atomKK->sync(execution_space,TYPE_MASK);
|
||||
type = atomKK->k_type.template view<DeviceType>();
|
||||
mass = atomKK->k_mass.view<DeviceType>();
|
||||
FixLangevinKokkosEndOfStepFunctor<DeviceType,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
}
|
||||
}
|
||||
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
k_lv.template modify<DeviceType>();
|
||||
|
||||
energy += energy_onestep*update->dt;
|
||||
}
|
||||
|
||||
@ -828,7 +1032,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
|
||||
double tmp[3];
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = force->ftm2v * 0.5 * dt / mass[type[i]];
|
||||
const double dtfm = ftm2v * 0.5 * dt / mass[type[i]];
|
||||
tmp[0] = v(i,0);
|
||||
tmp[1] = v(i,1);
|
||||
tmp[2] = v(i,2);
|
||||
@ -841,10 +1045,10 @@ void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
|
||||
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,0)) +
|
||||
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,0);
|
||||
v(i,1) = 0.5 * gjfsib * gjfsib * (v(i,1) + dtfm * f(i,1) / gjfa) +
|
||||
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,1)) +
|
||||
dtfm * 0.5 * (gjfsib * d_flangevin(i,1) - d_franprev(i,1)) +
|
||||
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,1);
|
||||
v(i,2) = 0.5 * gjfsib * gjfsib * (v(i,2) + dtfm * f(i,2) / gjfa) +
|
||||
dtfm * 0.5 * (gjfsib * d_flangevin(i,0) - d_franprev(i,2)) +
|
||||
dtfm * 0.5 * (gjfsib * d_flangevin(i,2) - d_franprev(i,2)) +
|
||||
(gjfsib * gjfa * 0.5 + dt * 0.25 / t_period / gjfsib) * d_lv(i,2);
|
||||
}
|
||||
d_lv(i,0) = tmp[0];
|
||||
@ -859,7 +1063,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
|
||||
{
|
||||
double tmp[3];
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = force->ftm2v * 0.5 * dt / rmass[i];
|
||||
const double dtfm = ftm2v * 0.5 * dt / rmass[i];
|
||||
tmp[0] = v(i,0);
|
||||
tmp[1] = v(i,1);
|
||||
tmp[2] = v(i,2);
|
||||
@ -891,6 +1095,9 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
{
|
||||
k_franprev.sync_host();
|
||||
k_lv.sync_host();
|
||||
|
||||
h_franprev(j,0) = h_franprev(i,0);
|
||||
h_franprev(j,1) = h_franprev(i,1);
|
||||
h_franprev(j,2) = h_franprev(i,2);
|
||||
@ -898,8 +1105,8 @@ void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int /*delflag*/)
|
||||
h_lv(j,1) = h_lv(i,1);
|
||||
h_lv(j,2) = h_lv(i,2);
|
||||
|
||||
k_franprev.template modify<LMPHostType>();
|
||||
k_lv.template modify<LMPHostType>();
|
||||
k_franprev.modify_host();
|
||||
k_lv.modify_host();
|
||||
|
||||
}
|
||||
|
||||
@ -924,24 +1131,6 @@ void FixLangevinKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, Bin
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::cleanup_copy()
|
||||
{
|
||||
random = nullptr;
|
||||
tstr = nullptr;
|
||||
gfactor1 = nullptr;
|
||||
gfactor2 = nullptr;
|
||||
ratio = nullptr;
|
||||
id_temp = nullptr;
|
||||
flangevin = nullptr;
|
||||
tforce = nullptr;
|
||||
gjfflag = 0;
|
||||
franprev = nullptr;
|
||||
lv = nullptr;
|
||||
id = style = nullptr;
|
||||
vatom = nullptr;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixLangevinKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
|
||||
@ -27,6 +27,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>);
|
||||
#include "kokkos_type.h"
|
||||
#include "kokkos_base.h"
|
||||
#include "Kokkos_Random.hpp"
|
||||
#include "rand_pool_wrap_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
@ -66,8 +67,8 @@ namespace LAMMPS_NS {
|
||||
FixLangevinKokkos(class LAMMPS *, int, char **);
|
||||
~FixLangevinKokkos() override;
|
||||
|
||||
void cleanup_copy();
|
||||
void init() override;
|
||||
void setup(int) override;
|
||||
void initial_integrate(int) override;
|
||||
void fused_integrate(int) override;
|
||||
void post_force(int) override;
|
||||
@ -135,13 +136,21 @@ namespace LAMMPS_NS {
|
||||
typename tdual_double_1d_3n::t_dev d_fsumall;
|
||||
typename tdual_double_1d_3n::t_host h_fsumall;
|
||||
|
||||
double boltz,dt,mvv2e,ftm2v,fran_prop_const;
|
||||
double boltz,dt,mvv2e,ftm2v,fran_prop_const,fran_prop_const_gjf;
|
||||
|
||||
void compute_target();
|
||||
|
||||
#ifndef LMP_KOKKOS_DEBUG_RNG
|
||||
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
|
||||
|
||||
//Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
|
||||
//typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
|
||||
#else
|
||||
RandPoolWrap rand_pool;
|
||||
typedef RandWrap rand_type;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
@ -150,7 +159,7 @@ namespace LAMMPS_NS {
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
|
||||
FixLangevinKokkosInitialIntegrateFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();};
|
||||
c(*c_ptr) {c.set_copymode(1);};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
@ -168,7 +177,7 @@ namespace LAMMPS_NS {
|
||||
|
||||
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {}
|
||||
~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();}
|
||||
~FixLangevinKokkosPostForceFunctor() {c.set_copymode(1);}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
@ -204,7 +213,7 @@ namespace LAMMPS_NS {
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
|
||||
FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();}
|
||||
c(*c_ptr) {c.set_copymode(1);}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
@ -218,7 +227,7 @@ namespace LAMMPS_NS {
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
typedef double value_type;
|
||||
FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();}
|
||||
c(*c_ptr) {c.set_copymode(1);}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy) const {
|
||||
@ -241,7 +250,7 @@ namespace LAMMPS_NS {
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
|
||||
FixLangevinKokkosEndOfStepFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();}
|
||||
c(*c_ptr) {c.set_copymode(1);}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
|
||||
@ -36,7 +36,7 @@ FixMomentumKokkos<DeviceType>::FixMomentumKokkos(LAMMPS *lmp, int narg, char **a
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
groupKK = (GroupKokkos<DeviceType> *)group;
|
||||
groupKK = (GroupKokkos *)group;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
@ -94,7 +94,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
double ekin_old,ekin_new;
|
||||
ekin_old = ekin_new = 0.0;
|
||||
|
||||
if (dynamic) masstotal = groupKK->mass(igroup);
|
||||
if (dynamic) masstotal = groupKK->mass_kk<DeviceType>(igroup);
|
||||
|
||||
// do nothing if group is empty, i.e. mass is zero;
|
||||
|
||||
@ -109,7 +109,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
auto groupbit2 = groupbit;
|
||||
if (linear) {
|
||||
double vcm[3];
|
||||
groupKK->vcm(igroup,masstotal,vcm);
|
||||
groupKK->vcm_kk<DeviceType>(igroup,masstotal,vcm);
|
||||
|
||||
// adjust velocities by vcm to zero linear momentum
|
||||
// only adjust a component if flag is set
|
||||
@ -131,9 +131,9 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
|
||||
|
||||
if (angular) {
|
||||
double xcm[3],angmom[3],omega[3],inertia[3][3];
|
||||
groupKK->xcm(igroup,masstotal,xcm);
|
||||
groupKK->angmom(igroup,xcm,angmom);
|
||||
groupKK->inertia(igroup,xcm,inertia);
|
||||
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
|
||||
groupKK->angmom_kk<DeviceType>(igroup,xcm,angmom);
|
||||
groupKK->inertia_kk<DeviceType>(igroup,xcm,inertia);
|
||||
group->omega(angmom,inertia,omega);
|
||||
|
||||
// adjust velocities to zero omega
|
||||
|
||||
@ -38,7 +38,7 @@ class FixMomentumKokkos : public FixMomentum {
|
||||
FixMomentumKokkos(class LAMMPS *, int, char **);
|
||||
void end_of_step() override;
|
||||
private:
|
||||
GroupKokkos<DeviceType> *groupKK;
|
||||
GroupKokkos *groupKK;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_rmass[i];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -95,7 +95,7 @@ void FixNVELimitKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
auto l_groupbit = groupbit;
|
||||
atomKK->sync(execution_space, X_MASK|V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_mass[d_type[i]];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -144,7 +144,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|RMASS_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_rmass[i];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
@ -168,7 +168,7 @@ void FixNVELimitKokkos<DeviceType>::final_integrate()
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space, V_MASK|F_MASK|MASK_MASK|TYPE_MASK );
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), KOKKOS_LAMBDA(const int i, int &l_ncount) {
|
||||
if (d_mask[i] & l_groupbit) {
|
||||
const double dtfm = l_dtf / d_mass[d_type[i]];
|
||||
d_v(i,0) += dtfm * d_f(i,0);
|
||||
|
||||
@ -38,7 +38,7 @@ FixRecenterKokkos<DeviceType>::FixRecenterKokkos(LAMMPS *lmp, int narg, char **a
|
||||
{
|
||||
kokkosable = 1;
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
groupKK = (GroupKokkos<DeviceType> *)group;
|
||||
groupKK = (GroupKokkos *)group;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | MASK_MASK;
|
||||
@ -87,9 +87,10 @@ void FixRecenterKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
// current COM
|
||||
|
||||
if (group->dynamic[igroup]) masstotal = groupKK->mass(igroup);
|
||||
|
||||
if (group->dynamic[igroup]) masstotal = groupKK->mass_kk<DeviceType>(igroup);
|
||||
double xcm[3];
|
||||
groupKK->xcm(igroup,masstotal,xcm);
|
||||
groupKK->xcm_kk<DeviceType>(igroup,masstotal,xcm);
|
||||
|
||||
// shift coords by difference between actual COM and requested COM
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@ class FixRecenterKokkos : public FixRecenter {
|
||||
FixRecenterKokkos(class LAMMPS *, int, char **);
|
||||
void initial_integrate(int) override;
|
||||
private:
|
||||
GroupKokkos<DeviceType> *groupKK;
|
||||
GroupKokkos *groupKK;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -1859,7 +1859,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if 0
|
||||
Kokkos::parallel_for ( inum,
|
||||
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,inum),
|
||||
LAMMPS_LAMBDA(const int ii)
|
||||
{
|
||||
// Create an atomic view of sumWeights and dpdThetaLocal. Only needed
|
||||
@ -1939,7 +1939,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
|
||||
|
||||
// self-interaction for local temperature
|
||||
#if 0
|
||||
Kokkos::parallel_for ( nlocal,
|
||||
Kokkos::parallel_for ( Kokkos::RangePolicy<DeviceType>(0,nlocal),
|
||||
LAMMPS_LAMBDA(const int i)
|
||||
{
|
||||
double wij = 0.0;
|
||||
|
||||
@ -283,22 +283,22 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(0, 2)));
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_inc(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_inc(&(d_counters(0, 2)));
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
Kokkos::atomic_increment(&(d_hist(rsqi)));
|
||||
Kokkos::atomic_inc(&(d_hist(rsqi)));
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
|
||||
&& (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(1, 2)));
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_inc(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_inc(&(d_counters(1, 2)));
|
||||
#endif
|
||||
double r = sqrt(rsq);
|
||||
double rinv = 1.0/r;
|
||||
@ -428,22 +428,22 @@ void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(0, 2)));
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(0, 0)));
|
||||
else Kokkos::atomic_inc(&(d_counters(0, 1)));
|
||||
Kokkos::atomic_inc(&(d_counters(0, 2)));
|
||||
int rsqi = rsq / 8;
|
||||
if (rsqi < 0) rsqi = 0;
|
||||
else if (rsqi > 31) rsqi = 31;
|
||||
Kokkos::atomic_increment(&(d_hist(rsqi)));
|
||||
Kokkos::atomic_inc(&(d_hist(rsqi)));
|
||||
#endif
|
||||
|
||||
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
|
||||
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
|
||||
&& (rsq >= EPSILON_SQUARED)) {
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_increment(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_increment(&(d_counters(1, 2)));
|
||||
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_inc(&(d_counters(1, 0)));
|
||||
else Kokkos::atomic_inc(&(d_counters(1, 1)));
|
||||
Kokkos::atomic_inc(&(d_counters(1, 2)));
|
||||
#endif
|
||||
|
||||
double r = sqrt(rsq);
|
||||
|
||||
@ -123,7 +123,7 @@ void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
auto l_yflag = yflag;
|
||||
auto l_zflag = zflag;
|
||||
|
||||
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(const int& i, double& espring_kk) {
|
||||
if (l_mask[i] & l_groupbit) {
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = l_x(i,0);
|
||||
|
||||
@ -1,363 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mitch Murphy (alphataubio at gmail)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "group_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "kokkos_few.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
GroupKokkos<DeviceType>::GroupKokkos(LAMMPS *lmp) : Group(lmp)
|
||||
{
|
||||
atomKK = (AtomKokkos *)atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// computations on a group of atoms
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the total mass of group of atoms
|
||||
use either per-type mass or per-atom rmass
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double GroupKokkos<DeviceType>::mass(int igroup)
|
||||
{
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
double one = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_rmass(i);
|
||||
}, one);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
|
||||
}, one);
|
||||
|
||||
}
|
||||
|
||||
double all;
|
||||
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
|
||||
return all;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass coords of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass coords in cm[]
|
||||
must unwrap atoms to compute center-of-mass correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GroupKokkos<DeviceType>::xcm(int igroup, double masstotal, double *xcm)
|
||||
{
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
double cmone[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
xcm[0] /= masstotal;
|
||||
xcm[1] /= masstotal;
|
||||
xcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass velocity of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass velocity in vcm[]
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GroupKokkos<DeviceType>::vcm(int igroup, double masstotal, double *vcm)
|
||||
{
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
vcm[0] /= masstotal;
|
||||
vcm[1] /= masstotal;
|
||||
vcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the angular momentum L (lmom) of group
|
||||
around center-of-mass cm
|
||||
must unwrap atoms to compute L correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GroupKokkos<DeviceType>::angmom(int igroup, double *xcm, double *lmom)
|
||||
{
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute moment of inertia tensor around center-of-mass xcm of group
|
||||
must unwrap atoms to compute itensor correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GroupKokkos<DeviceType>::inertia(int igroup, double *xcm, double itensor[3][3])
|
||||
{
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
|
||||
double ione[3][3];
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(atom->nlocal, KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
}
|
||||
|
||||
ione[1][0] = ione[0][1];
|
||||
ione[2][1] = ione[1][2];
|
||||
ione[2][0] = ione[0][2];
|
||||
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class GroupKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class GroupKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
@ -15,22 +15,352 @@
|
||||
#define LMP_GROUP_KOKKOS_H
|
||||
|
||||
#include "group.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "kokkos_few.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class GroupKokkos : public Group {
|
||||
public:
|
||||
GroupKokkos(class LAMMPS *);
|
||||
double mass(int); // total mass of atoms in group
|
||||
void xcm(int, double, double *); // center-of-mass coords of group
|
||||
void vcm(int, double, double *); // center-of-mass velocity of group
|
||||
void angmom(int, double *, double *); // angular momentum of group
|
||||
void inertia(int, double *, double[3][3]); // inertia tensor
|
||||
GroupKokkos(LAMMPS *lmp) : Group(lmp) { atomKK = (AtomKokkos *)atom; }
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// computations on a group of atoms
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the total mass of group of atoms
|
||||
use either per-type mass or per-atom rmass
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double mass_kk(int igroup)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
double one = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_rmass(i);
|
||||
}, one);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,MASK_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_one) {
|
||||
if (d_mask(i) & groupbit) l_one += d_mass(d_type(i));
|
||||
}, one);
|
||||
|
||||
}
|
||||
|
||||
double all;
|
||||
MPI_Allreduce(&one, &all, 1, MPI_DOUBLE, MPI_SUM, world);
|
||||
return all;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass coords of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass coords in cm[]
|
||||
must unwrap atoms to compute center-of-mass correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void xcm_kk(int igroup, double masstotal, double *xcm)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
double cmone[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_cmx, double &l_cmy, double &l_cmz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
l_cmx += unwrapKK[0] * massone;
|
||||
l_cmy += unwrapKK[1] * massone;
|
||||
l_cmz += unwrapKK[2] * massone;
|
||||
}
|
||||
}, cmone[0], cmone[1], cmone[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(cmone, xcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
xcm[0] /= masstotal;
|
||||
xcm[1] /= masstotal;
|
||||
xcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the center-of-mass velocity of group of atoms
|
||||
masstotal = total mass
|
||||
return center-of-mass velocity in vcm[]
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void vcm_kk(int igroup, double masstotal, double *vcm)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
l_px += d_v(i,0) * massone;
|
||||
l_py += d_v(i,1) * massone;
|
||||
l_pz += d_v(i,2) * massone;
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
|
||||
MPI_Allreduce(p, vcm, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
if (masstotal > 0.0) {
|
||||
vcm[0] /= masstotal;
|
||||
vcm[1] /= masstotal;
|
||||
vcm[2] /= masstotal;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the angular momentum L (lmom) of group
|
||||
around center-of-mass cm
|
||||
must unwrap atoms to compute L correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void angmom_kk(int igroup, double *xcm, double *lmom)
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_v = atomKK->k_v.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
double p[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_px, double &l_py, double &l_pz) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_px += massone * (dy * d_v(i,2) - dz * d_v(i,1));
|
||||
l_py += massone * (dz * d_v(i,0) - dx * d_v(i,2));
|
||||
l_pz += massone * (dx * d_v(i,1) - dy * d_v(i,0));
|
||||
}
|
||||
}, p[0], p[1], p[2]);
|
||||
|
||||
}
|
||||
MPI_Allreduce(p, lmom, 3, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute moment of inertia tensor around center-of-mass xcm of group
|
||||
must unwrap atoms to compute itensor correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void inertia_kk(int igroup, double *xcm, double itensor[3][3])
|
||||
{
|
||||
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
int groupbit = bitmask[igroup];
|
||||
auto d_x = atomKK->k_x.template view<DeviceType>();
|
||||
auto d_mask = atomKK->k_mask.template view<DeviceType>();
|
||||
auto d_image = atomKK->k_image.template view<DeviceType>();
|
||||
auto l_prd = Few<double, 3>(domain->prd);
|
||||
auto l_h = Few<double, 6>(domain->h);
|
||||
auto l_triclinic = domain->triclinic;
|
||||
auto l_xcm0 = xcm[0];
|
||||
auto l_xcm1 = xcm[1];
|
||||
auto l_xcm2 = xcm[2];
|
||||
|
||||
double ione[3][3];
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) ione[i][j] = 0.0;
|
||||
|
||||
if (atomKK->rmass) {
|
||||
|
||||
auto d_rmass = atomKK->k_rmass.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|RMASS_MASK);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_rmass(i);
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
} else {
|
||||
|
||||
auto d_mass = atomKK->k_mass.template view<DeviceType>();
|
||||
auto d_type = atomKK->k_type.template view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK|MASK_MASK|IMAGE_MASK|TYPE_MASK);
|
||||
atomKK->k_mass.template sync<DeviceType>();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,atom->nlocal), KOKKOS_LAMBDA(const int i, double &l_i00, double &l_i11, double &l_i22, double &l_i01, double &l_i12, double &l_i02) {
|
||||
if (d_mask(i) & groupbit) {
|
||||
double massone = d_mass(d_type(i));
|
||||
Few<double,3> x_i;
|
||||
x_i[0] = d_x(i,0);
|
||||
x_i[1] = d_x(i,1);
|
||||
x_i[2] = d_x(i,2);
|
||||
auto unwrapKK = DomainKokkos::unmap(l_prd,l_h,l_triclinic,x_i,d_image(i));
|
||||
double dx = unwrapKK[0] - l_xcm0;
|
||||
double dy = unwrapKK[1] - l_xcm1;
|
||||
double dz = unwrapKK[2] - l_xcm2;
|
||||
l_i00 += massone * (dy * dy + dz * dz);
|
||||
l_i11 += massone * (dx * dx + dz * dz);
|
||||
l_i22 += massone * (dx * dx + dy * dy);
|
||||
l_i01 -= massone * dx * dy;
|
||||
l_i12 -= massone * dy * dz;
|
||||
l_i02 -= massone * dx * dz;
|
||||
}
|
||||
}, ione[0][0], ione[1][1], ione[2][2], ione[0][1], ione[1][2], ione[0][2]);
|
||||
|
||||
}
|
||||
|
||||
ione[1][0] = ione[0][1];
|
||||
ione[2][1] = ione[1][2];
|
||||
ione[2][0] = ione[0][2];
|
||||
MPI_Allreduce(&ione[0][0], &itensor[0][0], 9, MPI_DOUBLE, MPI_SUM, world);
|
||||
}
|
||||
|
||||
private:
|
||||
ExecutionSpace execution_space;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -74,14 +74,14 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
if(k_eatom.extent(0) < maxeatom) {
|
||||
if ((int)k_eatom.extent(0) < maxeatom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
|
||||
d_eatom = k_eatom.template view<KKDeviceType>();
|
||||
} else Kokkos::deep_copy(d_eatom,0.0);
|
||||
}
|
||||
if (vflag_atom) {
|
||||
if(k_vatom.extent(0) < maxvatom) {
|
||||
if ((int)k_vatom.extent(0) < maxvatom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
|
||||
d_vatom = k_vatom.template view<KKDeviceType>();
|
||||
|
||||
@ -77,7 +77,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag)
|
||||
|
||||
Kokkos::parallel_for(nimproperlist_orig,LAMMPS_LAMBDA(int i) {
|
||||
const int m = d_map[d_improperlist_orig(i,4)];
|
||||
if (m >= 0) Kokkos::atomic_increment(&d_nimproperlist[m]);
|
||||
if (m >= 0) Kokkos::atomic_inc(&d_nimproperlist[m]);
|
||||
});
|
||||
|
||||
k_nimproperlist.modify_device();
|
||||
@ -88,7 +88,7 @@ void ImproperHybridKokkos::compute(int eflag, int vflag)
|
||||
if (h_nimproperlist[m] > maximproper_all)
|
||||
maximproper_all = h_nimproperlist[m] + EXTRA;
|
||||
|
||||
if (k_improperlist.d_view.extent(1) < maximproper_all)
|
||||
if ((int)k_improperlist.d_view.extent(1) < maximproper_all)
|
||||
MemKK::realloc_kokkos(k_improperlist, "improper_hybrid:improperlist", nstyles, maximproper_all, 5);
|
||||
auto d_improperlist = k_improperlist.d_view;
|
||||
|
||||
|
||||
@ -221,15 +221,19 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type ***&array,
|
||||
int n1, int n2, int n3, const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2,n3);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
|
||||
typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
|
||||
|
||||
bigint m;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
if (n2 == 0) {
|
||||
array[i] = nullptr;
|
||||
} else {
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
|
||||
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
m = ((bigint) i) * n2;
|
||||
array[i] = &plane[m];
|
||||
|
||||
for (int j = 0; j < n2; j++) {
|
||||
if (n3 == 0)
|
||||
array[i][j] = nullptr;
|
||||
@ -248,15 +252,19 @@ template <typename TYPE, typename HTYPE>
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
|
||||
typename TYPE::value_type **plane = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
|
||||
|
||||
bigint m;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
if (n2 == 0) {
|
||||
array[i] = nullptr;
|
||||
} else {
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
|
||||
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
m = ((bigint) i) * n2;
|
||||
array[i] = &plane[m];
|
||||
|
||||
for (int j = 0; j < n2; j++) {
|
||||
if (n3 == 0)
|
||||
array[i][j] = nullptr;
|
||||
@ -288,15 +296,19 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type ***&array,
|
||||
{
|
||||
if (array == nullptr) return create_kokkos(data,array,n1,n2,n3,name);
|
||||
data.resize(n1,n2,n3);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
array = (typename TYPE::value_type ***) smalloc(nbytes,name);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1 * n2;
|
||||
typename TYPE::value_type **plane = (typename TYPE::value_type **) srealloc(array[0],nbytes,name);
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
|
||||
array = (typename TYPE::value_type ***) srealloc(array,nbytes,name);
|
||||
|
||||
bigint m;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
if (n2 == 0) {
|
||||
array[i] = nullptr;
|
||||
} else {
|
||||
nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
|
||||
array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
m = ((bigint) i) * n2;
|
||||
array[i] = &plane[m];
|
||||
|
||||
for (int j = 0; j < n2; j++) {
|
||||
if (n3 == 0)
|
||||
array[i][j] = nullptr;
|
||||
@ -316,10 +328,9 @@ template <typename TYPE>
|
||||
void destroy_kokkos(TYPE data, typename TYPE::value_type*** &array)
|
||||
{
|
||||
if (array == nullptr) return;
|
||||
int n1 = data.extent(0);
|
||||
for (int i = 0; i < n1; ++i)
|
||||
sfree(array[i]);
|
||||
data = TYPE();
|
||||
|
||||
sfree(array[0]);
|
||||
sfree(array);
|
||||
array = nullptr;
|
||||
}
|
||||
@ -411,7 +422,7 @@ template <typename TYPE>
|
||||
TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,const char *name)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(std::string(name) ,n1,n2,n3,n4,n5,n6);
|
||||
data = TYPE(std::string(name),n1,n2,n3,n4,n5,n6);
|
||||
return data;
|
||||
}
|
||||
|
||||
@ -420,4 +431,3 @@ TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -145,13 +145,13 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
|
||||
auto type = atomKK->k_type.view<DeviceType>();
|
||||
auto map=k_pairmliap->k_map.template view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_scan(natomneigh, KOKKOS_LAMBDA (int ii, int &update, const bool final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &update, const bool final) {
|
||||
if (final)
|
||||
d_ij(ii) = update;
|
||||
update += d_numneighs(ii);
|
||||
});
|
||||
|
||||
Kokkos::parallel_for(natomneigh, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii) {
|
||||
int ij = d_ij(ii);
|
||||
const int i = d_ilist[ii];
|
||||
const double xtmp = x(i, 0);
|
||||
@ -183,7 +183,7 @@ void MLIAPDataKokkos<DeviceType>::generate_neighdata(class NeighList *list_in, i
|
||||
d_ielems[ii] = ielem;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for(nmax, KOKKOS_LAMBDA (int i) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nmax), KOKKOS_LAMBDA (int i) {
|
||||
const int itype = type(i);
|
||||
d_elems(i) = map(itype);
|
||||
});
|
||||
@ -225,7 +225,7 @@ void MLIAPDataKokkos<DeviceType>::grow_neigharrays() {
|
||||
auto d_cutsq=k_pairmliap->k_cutsq.template view<DeviceType>();
|
||||
auto h_cutsq=k_pairmliap->k_cutsq.template view<LMPHostType>();
|
||||
auto d_numneighs = k_numneighs.template view<DeviceType>();
|
||||
Kokkos::parallel_reduce(natomneigh, KOKKOS_LAMBDA (int ii, int &contrib) {
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,natomneigh), KOKKOS_LAMBDA (int ii, int &contrib) {
|
||||
const int i = d_ilist[ii];
|
||||
int count=0;
|
||||
const double xtmp = x(i, 0);
|
||||
|
||||
@ -75,7 +75,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_forces(class MLIAPData *data_
|
||||
Kokkos::View<double[6], DeviceType> virial("virial");
|
||||
data->k_pairmliap->k_vatom.template modify<LMPHostType>();
|
||||
data->k_pairmliap->k_vatom.template sync<DeviceType>();
|
||||
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA(int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA(int ii) {
|
||||
double fij[3];
|
||||
const int i = d_iatoms(ii);
|
||||
|
||||
@ -187,7 +187,7 @@ void MLIAPDescriptorSO3Kokkos<DeviceType>::compute_force_gradients(class MLIAPDa
|
||||
|
||||
auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz;
|
||||
|
||||
Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
|
||||
const int i = d_iatoms(ii);
|
||||
|
||||
// ensure rij, inside, wj, and rcutij are of size jnum
|
||||
|
||||
@ -147,7 +147,7 @@ void PairMEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
auto l_numneigh_half = d_numneigh_half;
|
||||
auto l_offset = d_offset;
|
||||
|
||||
Kokkos::parallel_scan(inum_half, LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType>(0,inum_half), LAMMPS_LAMBDA(int ii, int &m_fill, bool final) {
|
||||
int i = l_ilist_half[ii];
|
||||
m_fill += l_numneigh_half[i];
|
||||
if (final)
|
||||
|
||||
@ -302,7 +302,7 @@ void PairMLIAPKokkos<DeviceType>::e_tally(MLIAPData* data)
|
||||
auto d_iatoms = k_data->k_iatoms.template view<DeviceType>();
|
||||
auto d_eatoms = k_data->k_eatoms.template view<DeviceType>();
|
||||
auto d_eatom = k_eatom.template view<DeviceType>();
|
||||
Kokkos::parallel_for(data->nlistatoms, KOKKOS_LAMBDA (int ii) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,data->nlistatoms), KOKKOS_LAMBDA (int ii) {
|
||||
d_eatom(d_iatoms(ii)) = d_eatoms(ii);
|
||||
});
|
||||
k_eatom.modify<DeviceType>();
|
||||
|
||||
@ -532,7 +532,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
|
||||
auto l_neighbors = d_neighbors;
|
||||
|
||||
// compute number of pairs for each atom i
|
||||
Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
Kokkos::parallel_for("NeighborCount", typename Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team) {
|
||||
int i = team.league_rank();
|
||||
int gi = l_ilist(gi1 + i);
|
||||
double xi0 = l_x(gi, 0);
|
||||
@ -555,7 +555,7 @@ int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq,
|
||||
});
|
||||
|
||||
// accumalative sum
|
||||
Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) {
|
||||
Kokkos::parallel_scan("InclusivePrefixSum", Kokkos::RangePolicy<DeviceType>(0,Ni + 1), KOKKOS_LAMBDA(int i, int& update, const bool final) {
|
||||
if (i > 0) {
|
||||
update += l_numij(i);
|
||||
if (final) {
|
||||
@ -582,7 +582,7 @@ void PairPODKokkos<DeviceType>::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,
|
||||
auto l_map = d_map;
|
||||
auto l_type = type;
|
||||
|
||||
Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
Kokkos::parallel_for("NeighborList", typename Kokkos::TeamPolicy<DeviceType>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team) {
|
||||
int i = team.league_rank();
|
||||
int gi = l_ilist(gi1 + i);
|
||||
double xi0 = l_x(gi, 0);
|
||||
@ -622,7 +622,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
|
||||
t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree,
|
||||
int l_inversedegree, int l_nbesselpars, int Nij)
|
||||
{
|
||||
Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("ComputeRadialBasis", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
|
||||
double xij1 = l_rij(0+3*n);
|
||||
double xij2 = l_rij(1+3*n);
|
||||
double xij3 = l_rij(2+3*n);
|
||||
@ -722,7 +722,7 @@ void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod
|
||||
template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2)
|
||||
{
|
||||
Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("MatrixMultiply", Kokkos::RangePolicy<DeviceType>(0,r1 * c2), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / r1; // Calculate column index
|
||||
int i = idx % r1; // Calculate row index
|
||||
double sum = 0.0;
|
||||
@ -737,7 +737,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
|
||||
t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N)
|
||||
{
|
||||
Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) {
|
||||
Kokkos::parallel_for("AngularBasis", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int j) {
|
||||
double x = l_rij(j*3 + 0);
|
||||
double y = l_rij(j*3 + 1);
|
||||
double z = l_rij(j*3 + 2);
|
||||
@ -817,7 +817,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
|
||||
{
|
||||
int totalIterations = l_nrbf3 * l_K3 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int k = idx % l_K3;
|
||||
int temp = idx / l_K3;
|
||||
int m = temp % l_nrbf3;
|
||||
@ -835,7 +835,7 @@ void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf
|
||||
});
|
||||
}
|
||||
else {
|
||||
Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("RadialAngularSum", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int k = idx % l_K3;
|
||||
int temp = idx / l_K3;
|
||||
int m = temp % l_nrbf3;
|
||||
@ -863,7 +863,7 @@ void PairPODKokkos<DeviceType>::twobodydesc(t_pod_1d d2, t_pod_1d l_rbf, t_pod_
|
||||
int l_nrbf2, const int Ni, const int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf2 * Nij;
|
||||
Kokkos::parallel_for("twobodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("twobodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx / l_nrbf2; // pair index
|
||||
int m = idx % l_nrbf2; // rbd index
|
||||
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
|
||||
@ -876,7 +876,7 @@ void PairPODKokkos<DeviceType>::twobody_forces(t_pod_1d fij, t_pod_1d cb2, t_pod
|
||||
t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbf2, const int Ni, const int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf2 * Nij;
|
||||
Kokkos::parallel_for("twobody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("twobody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx / l_nrbf2; // pair index
|
||||
int m = idx % l_nrbf2; // rbd index
|
||||
int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
|
||||
@ -893,7 +893,7 @@ void PairPODKokkos<DeviceType>::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_po
|
||||
int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni)
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Ni;
|
||||
Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("ThreeBodyDesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int m = idx % l_nrbf3;
|
||||
int i = idx / l_nrbf3;
|
||||
int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i;
|
||||
@ -925,7 +925,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Nij;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("threebody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
int idxR = j + Nij * m; // Pre-compute the index for rbf
|
||||
@ -961,7 +961,7 @@ void PairPODKokkos<DeviceType>::threebody_forces(t_pod_1d fij, t_pod_1d cb3, t_p
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf3 * l_nrbf3;
|
||||
Kokkos::parallel_for("threebody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Derive the original j value
|
||||
int m = idx % l_nrbf3; // Derive the original m value
|
||||
int i2 = l_tj(j) - 1;
|
||||
@ -1007,7 +1007,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
for (int p = 0; p < l_nabf3; p++) {
|
||||
@ -1024,7 +1024,7 @@ void PairPODKokkos<DeviceType>::threebody_forcecoeff(t_pod_1d fb3, t_pod_1d cb3,
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf3 * l_nrbf3;
|
||||
Kokkos::parallel_for("threebody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("threebody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf3; // Derive the original j value
|
||||
int m = idx % l_nrbf3; // Derive the original m value
|
||||
for (int p = 0; p < l_nabf3; p++) {
|
||||
@ -1054,7 +1054,7 @@ void PairPODKokkos<DeviceType>::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_po
|
||||
t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni)
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Ni;
|
||||
Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbodydesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int m = idx % l_nrbf4;
|
||||
int i = idx / l_nrbf4;
|
||||
int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i;
|
||||
@ -1092,7 +1092,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Nij;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("fourbody_forces1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forces1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);
|
||||
@ -1151,7 +1151,7 @@ void PairPODKokkos<DeviceType>::fourbody_forces(t_pod_1d fij, t_pod_1d cb4, t_po
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf4 * l_nrbf4;
|
||||
Kokkos::parallel_for("fourbody_forces2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forces2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxM = j + Nij * m;
|
||||
@ -1241,7 +1241,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
|
||||
{
|
||||
int totalIterations = l_nrbf4 * Ni;
|
||||
if (l_nelements==1) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff1", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff1", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
int idxU = l_K3 * m + l_K3*l_nrbf3*i;
|
||||
@ -1268,7 +1268,7 @@ void PairPODKokkos<DeviceType>::fourbody_forcecoeff(t_pod_1d fb4, t_pod_1d cb4,
|
||||
}
|
||||
else {
|
||||
int N3 = Ni * l_nabf4 * l_nrbf4;
|
||||
Kokkos::parallel_for("fourbody_forcecoeff2", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("fourbody_forcecoeff2", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx / l_nrbf4; // Derive the original j value
|
||||
int m = idx % l_nrbf4; // Derive the original m value
|
||||
for (int p = 0; p < l_nabf4; p++) {
|
||||
@ -1311,7 +1311,7 @@ void PairPODKokkos<DeviceType>::allbody_forces(t_pod_1d fij, t_pod_1d l_forcecoe
|
||||
t_pod_1i l_idxi, t_pod_1i l_tj, int l_nelements, int l_nrbf3, int l_K3, int Nij)
|
||||
{
|
||||
int totalIterations = l_nrbf3 * Nij;
|
||||
Kokkos::parallel_for("allbody_forces", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("allbody_forces", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int j = idx / l_nrbf3; // Calculate j using integer division
|
||||
int m = idx % l_nrbf3; // Calculate m using modulo operation
|
||||
int i2 = l_tj(j) - 1;
|
||||
@ -1346,7 +1346,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
|
||||
{
|
||||
int totalIterations = n12 * Ni;
|
||||
Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("crossdesc", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int i = idx / Ni;
|
||||
|
||||
@ -1359,7 +1359,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
|
||||
t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
|
||||
{
|
||||
int totalIterations = n12 * Ni;
|
||||
Kokkos::parallel_for("crossdesc_reduction", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("crossdesc_reduction", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni; // Ni
|
||||
int m = idx / Ni; // n12
|
||||
int k1 = ind1(m); // dd1
|
||||
@ -1375,7 +1375,7 @@ void PairPODKokkos<DeviceType>::crossdesc_reduction(t_pod_1d cb1, t_pod_1d cb2,
|
||||
template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::set_array_to_zero(t_pod_1d a, int N)
|
||||
{
|
||||
Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) {
|
||||
Kokkos::parallel_for("initialize_array", Kokkos::RangePolicy<DeviceType>(0,N), KOKKOS_LAMBDA(int i) {
|
||||
a(i) = 0.0;
|
||||
});
|
||||
}
|
||||
@ -1480,7 +1480,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
|
||||
int nDes = Mdesc;
|
||||
int nCoeff = nCoeffPerElement;
|
||||
|
||||
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
ei[n] = cefs[0 + nc];
|
||||
for (int m=0; m<nDes; m++)
|
||||
@ -1488,7 +1488,7 @@ void PairPODKokkos<DeviceType>::blockatom_base_coefficients(t_pod_1d ei, t_pod_1
|
||||
});
|
||||
|
||||
int totalIterations = Ni*nDes;
|
||||
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1516,7 +1516,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
int nCoeff = nCoeffPerElement;
|
||||
|
||||
int totalIterations = Ni*nCom;
|
||||
Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("pca", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int k = idx / Ni;
|
||||
double sum = 0.0;
|
||||
@ -1528,7 +1528,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
});
|
||||
|
||||
totalIterations = Ni*nCls;
|
||||
Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("inverse_square_distances", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int j = idx / Ni;
|
||||
int typei = tyai[i]-1;
|
||||
@ -1541,14 +1541,14 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
D[i + Ni*j] = 1.0 / sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) {
|
||||
Kokkos::parallel_for("Probabilities", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int i) {
|
||||
double sum = 0;
|
||||
for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];
|
||||
sumD[i] = sum;
|
||||
for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("atomic_energies", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int n) {
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
ei[n] = cefs[0 + nc];
|
||||
for (int k = 0; k<nCls; k++)
|
||||
@ -1556,7 +1556,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int k = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1567,7 +1567,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
});
|
||||
|
||||
totalIterations = Ni*nDes;
|
||||
Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int n = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int nc = nCoeff*(tyai[n]-1);
|
||||
@ -1577,7 +1577,7 @@ void PairPODKokkos<DeviceType>::blockatom_environment_descriptors(t_pod_1d ei, t
|
||||
cb[n + Ni*m] = sum;
|
||||
});
|
||||
|
||||
Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {
|
||||
Kokkos::parallel_for("base_env_coefficients", Kokkos::RangePolicy<DeviceType>(0,totalIterations), KOKKOS_LAMBDA(int idx) {
|
||||
int i = idx % Ni;
|
||||
int m = idx / Ni;
|
||||
int typei = tyai[i]-1;
|
||||
@ -1670,7 +1670,7 @@ template<class DeviceType>
|
||||
void PairPODKokkos<DeviceType>::tallyforce(t_pod_1d l_fij, t_pod_1i l_ai, t_pod_1i l_aj, int Nij)
|
||||
{
|
||||
auto l_f = f;
|
||||
Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) {
|
||||
Kokkos::parallel_for("TallyForce", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int n) {
|
||||
int im = l_ai(n);
|
||||
int jm = l_aj(n);
|
||||
int n3 = 3*n;
|
||||
@ -1694,7 +1694,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
|
||||
// For global energy tally
|
||||
if (eflag_global) {
|
||||
double local_eng_vdwl = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k, E_FLOAT& update) {
|
||||
update += l_ei(k);
|
||||
}, local_eng_vdwl);
|
||||
|
||||
@ -1704,7 +1704,7 @@ void PairPODKokkos<DeviceType>::tallyenergy(t_pod_1d l_ei, int istart, int Ni)
|
||||
|
||||
// For per-atom energy tally
|
||||
if (eflag_atom) {
|
||||
Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) {
|
||||
Kokkos::parallel_for("PerAtomEnergyTally", Kokkos::RangePolicy<DeviceType>(0,Ni), KOKKOS_LAMBDA(int k) {
|
||||
l_eatom(istart + k) += l_ei(k);
|
||||
});
|
||||
}
|
||||
@ -1718,7 +1718,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
if (vflag_global) {
|
||||
for (int j=0; j<3; j++) {
|
||||
F_FLOAT sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(j + k3) * l_fij(j + k3);
|
||||
}, sum);
|
||||
@ -1726,21 +1726,21 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
}
|
||||
|
||||
F_FLOAT sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(k3) * l_fij(1 + k3);
|
||||
}, sum);
|
||||
virial[3] -= sum;
|
||||
|
||||
sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(k3) * l_fij(2 + k3);
|
||||
}, sum);
|
||||
virial[4] -= sum;
|
||||
|
||||
sum = 0.0;
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
Kokkos::parallel_reduce("GlobalStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k, F_FLOAT& update) {
|
||||
int k3 = 3*k;
|
||||
update += l_rij(1+k3) * l_fij(2+k3);
|
||||
}, sum);
|
||||
@ -1748,7 +1748,7 @@ void PairPODKokkos<DeviceType>::tallystress(t_pod_1d l_fij, t_pod_1d l_rij, t_po
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) {
|
||||
Kokkos::parallel_for("PerAtomStressTally", Kokkos::RangePolicy<DeviceType>(0,Nij), KOKKOS_LAMBDA(int k) {
|
||||
int i = l_ai(k);
|
||||
int j = l_aj(k);
|
||||
int k3 = 3*k;
|
||||
|
||||
@ -385,13 +385,13 @@ void PairReaxFFKokkos<DeviceType>::init_md()
|
||||
swb = api->control->nonb_cut;
|
||||
enobondsflag = api->control->enobondsflag;
|
||||
|
||||
if (fabs(swa) > 0.01)
|
||||
error->warning(FLERR,"Warning: non-zero lower Taper-radius cutoff");
|
||||
if ((fabs(swa) > 0.01) && (comm->me == 0))
|
||||
error->warning(FLERR, "Non-zero lower Taper-radius cutoff");
|
||||
|
||||
if (swb < 0)
|
||||
error->one(FLERR,"Negative upper Taper-radius cutoff");
|
||||
else if (swb < 5)
|
||||
error->one(FLERR,"Warning: very low Taper-radius cutoff: {}\n", swb);
|
||||
if (swb < 0.0) {
|
||||
error->all(FLERR,"Negative upper Taper-radius cutoff");
|
||||
} else if ((swb < 5.0) && (comm->me ==0))
|
||||
error->warning(FLERR,"Very low Taper-radius cutoff: {}\n", swb);
|
||||
|
||||
d1 = swb - swa;
|
||||
d7 = powint(d1,7);
|
||||
|
||||
@ -39,7 +39,7 @@ RandPoolWrap::~RandPoolWrap()
|
||||
void RandPoolWrap::destroy()
|
||||
{
|
||||
if (random_thr) {
|
||||
for (int i=1; i < nthreads; ++i)
|
||||
for (int i = 1; i < nthreads; ++i)
|
||||
delete random_thr[i];
|
||||
|
||||
delete[] random_thr;
|
||||
@ -51,7 +51,7 @@ void RandPoolWrap::init(RanMars* random, int seed)
|
||||
{
|
||||
// deallocate pool of RNGs
|
||||
if (random_thr) {
|
||||
for (int i=1; i < this->nthreads; ++i)
|
||||
for (int i = 1; i < nthreads; ++i)
|
||||
delete random_thr[i];
|
||||
|
||||
delete[] random_thr;
|
||||
|
||||
@ -59,10 +59,12 @@ class RandPoolWrap : protected Pointers {
|
||||
typedef Kokkos::Experimental::UniqueToken<
|
||||
LMPHostType, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
|
||||
|
||||
#ifndef LMP_KOKKOS_GPU
|
||||
unique_token_type unique_token;
|
||||
int tid = (int) unique_token.acquire();
|
||||
rand_wrap.rng = random_thr[tid];
|
||||
unique_token.release(tid);
|
||||
#endif
|
||||
|
||||
return rand_wrap;
|
||||
}
|
||||
|
||||
@ -63,10 +63,9 @@ class RegSphereKokkos : public RegSphere, public KokkosBase {
|
||||
double xs, ys, zs;
|
||||
double xnear[3], xorig[3];
|
||||
|
||||
if (dynamic) {
|
||||
xorig[0] = x; xorig[1] = y; xorig[2] = z;
|
||||
xorig[0] = x; xorig[1] = y; xorig[2] = z;
|
||||
if (dynamic)
|
||||
inverse_transform(x, y, z);
|
||||
}
|
||||
|
||||
xnear[0] = x; xnear[1] = y; xnear[2] = z;
|
||||
|
||||
|
||||
@ -53,6 +53,8 @@ lmpinstalledpkgs.h
|
||||
lmpgitversion.h
|
||||
mliap_model_python_couple.cpp
|
||||
mliap_model_python_couple.h
|
||||
# removed in Dec 2024
|
||||
group_kokkos.cpp
|
||||
# renamed in September 2024
|
||||
group_ndx.cpp
|
||||
group_ndx.h
|
||||
|
||||
@ -80,15 +80,14 @@ namespace ReaxFF {
|
||||
swa = control->nonb_low;
|
||||
swb = control->nonb_cut;
|
||||
|
||||
if (fabs(swa) > 0.01 && control->me == 0)
|
||||
if ((fabs(swa) > 0.01) && (control->me == 0))
|
||||
error->warning(FLERR, "Non-zero lower Taper-radius cutoff");
|
||||
|
||||
if (swb < 0) {
|
||||
if (swb < 0.0) {
|
||||
error->all(FLERR,"Negative upper Taper-radius cutoff");
|
||||
}
|
||||
else if (swb < 5 && control->me == 0)
|
||||
error->warning(FLERR,fmt::format("Warning: very low Taper-radius cutoff: "
|
||||
"{}\n", swb));
|
||||
} else if ((swb < 5.0) && (control->me == 0))
|
||||
error->warning(FLERR,fmt::format("Very low Taper-radius cutoff: {}\n", swb));
|
||||
|
||||
d1 = swb - swa;
|
||||
d7 = pow(d1, 7.0);
|
||||
swa2 = SQR(swa);
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include "comm_kokkos.h" // IWYU pragma: export
|
||||
#include "comm_tiled_kokkos.h" // IWYU pragma: export
|
||||
#include "domain_kokkos.h" // IWYU pragma: export
|
||||
#include "group_kokkos.h" // IWYU pragma: export
|
||||
#include "kokkos.h" // IWYU pragma: export
|
||||
#include "memory_kokkos.h" // IWYU pragma: export
|
||||
#include "modify_kokkos.h" // IWYU pragma: export
|
||||
@ -39,6 +40,7 @@
|
||||
#include "comm_brick.h"
|
||||
#include "comm_tiled.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "neighbor.h"
|
||||
@ -86,6 +88,11 @@ class DomainKokkos : public Domain {
|
||||
DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {}
|
||||
};
|
||||
|
||||
class GroupKokkos : public Group {
|
||||
public:
|
||||
GroupKokkos(class LAMMPS *lmp) : Group(lmp) {}
|
||||
};
|
||||
|
||||
class NeighborKokkos : public Neighbor {
|
||||
public:
|
||||
NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {}
|
||||
|
||||
@ -2507,7 +2507,8 @@ void CommTiled::deallocate_swap(int n)
|
||||
memory->destroy(sendbox_multi[i]);
|
||||
memory->destroy(sendbox_multiold[i]);
|
||||
|
||||
delete [] maxsendlist[i];
|
||||
if (maxsendlist)
|
||||
delete [] maxsendlist[i];
|
||||
|
||||
if (sendlist && sendlist[i]) {
|
||||
for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
|
||||
|
||||
@ -153,7 +153,7 @@ class CommTiled : public Comm {
|
||||
virtual void grow_swap_send(int, int, int); // grow swap arrays for send and recv
|
||||
void grow_swap_send_multi(int, int); // grow multi swap arrays for send and recv
|
||||
void grow_swap_recv(int, int);
|
||||
void deallocate_swap(int); // deallocate swap arrays
|
||||
void deallocate_swap(int); // deallocate swap arrays
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -264,6 +264,8 @@ class Fix : protected Pointers {
|
||||
|
||||
virtual double memory_usage() { return 0.0; }
|
||||
|
||||
void set_copymode(int value) { copymode = value; }
|
||||
|
||||
protected:
|
||||
int instance_me; // which Fix class instantiation I am
|
||||
|
||||
|
||||
@ -191,6 +191,8 @@ FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
FixLangevin::~FixLangevin()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
delete random;
|
||||
delete[] tstr;
|
||||
delete[] gfactor1;
|
||||
@ -509,7 +511,7 @@ void FixLangevin::post_force(int /*vflag*/)
|
||||
else post_force_templated<1,0,0,0,0,0>();
|
||||
else
|
||||
if (gjfflag)
|
||||
if (tallyflag || osflag)
|
||||
if (tallyflag || osflag)
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) post_force_templated<0,1,1,1,1,1>();
|
||||
|
||||
@ -872,7 +872,9 @@ void LAMMPS::create()
|
||||
else
|
||||
atom->create_avec("atomic",0,nullptr,1);
|
||||
|
||||
group = new Group(this);
|
||||
if (kokkos) group = new GroupKokkos(this);
|
||||
else group = new Group(this);
|
||||
|
||||
force = new Force(this); // must be after group, to create temperature
|
||||
|
||||
if (kokkos) modify = new ModifyKokkos(this);
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
---
|
||||
lammps_version: 28 Mar 2023
|
||||
date_generated: Fri Apr 7 18:04:29 2023
|
||||
tags: unstable
|
||||
epsilon: 7.5e-13
|
||||
skip_tests: single
|
||||
prerequisites: ! |
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
---
|
||||
lammps_version: 28 Mar 2023
|
||||
date_generated: Thu Mar 30 14:38:22 2023
|
||||
tags: unstable
|
||||
epsilon: 7.5e-13
|
||||
skip_tests: single
|
||||
prerequisites: ! |
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
---
|
||||
lammps_version: 28 Mar 2023
|
||||
date_generated: Fri Apr 7 18:07:13 2023
|
||||
tags: unstable
|
||||
epsilon: 7.5e-13
|
||||
skip_tests: single
|
||||
prerequisites: ! |
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 8 Feb 2023
|
||||
date_generated: Tue Mar 7 21:07:27 2023
|
||||
epsilon: 2.5e-13
|
||||
epsilon: 5.0e-13
|
||||
skip_tests: extract
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
---
|
||||
lammps_version: 7 Feb 2024
|
||||
tags:
|
||||
date_generated: Sat Apr 13 11:41:16 2024
|
||||
epsilon: 5.0e-11
|
||||
epsilon: 2.0e-10
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 29 Aug 2024
|
||||
date_generated: Tue Oct 1 12:45:25 2024
|
||||
epsilon: 2e-13
|
||||
epsilon: 1.0e-11
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 29 Aug 2024
|
||||
date_generated: Tue Oct 1 12:45:46 2024
|
||||
epsilon: 1e-12
|
||||
epsilon: 2.5e-11
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 17 Feb 2022
|
||||
date_generated: Thu Mar 17 19:43:17 2022
|
||||
epsilon: 2e-14
|
||||
epsilon: 5.0e-14
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 17 Feb 2022
|
||||
date_generated: Fri Mar 18 22:18:01 2022
|
||||
epsilon: 4e-14
|
||||
epsilon: 5.0e-14
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 8 Feb 2023
|
||||
date_generated: Thu Feb 23 00:40:51 2023
|
||||
epsilon: 4e-14
|
||||
epsilon: 5.0e-14
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 27 Jun 2024
|
||||
date_generated: Fri Aug 2 23:56:34 2024
|
||||
epsilon: 2e-14
|
||||
epsilon: 1.0e-13
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 8 Feb 2023
|
||||
date_generated: Thu Feb 23 15:26:55 2023
|
||||
epsilon: 4e-14
|
||||
epsilon: 1.0e-13
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 8 Feb 2023
|
||||
date_generated: Thu Feb 23 00:56:30 2023
|
||||
epsilon: 4e-14
|
||||
epsilon: 2.0e-13
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 8 Feb 2023
|
||||
date_generated: Thu Feb 23 00:56:30 2023
|
||||
epsilon: 4e-14
|
||||
epsilon: 2.0e-13
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 17 Feb 2022
|
||||
date_generated: Fri Mar 18 22:17:48 2022
|
||||
epsilon: 7.5e-09
|
||||
epsilon: 1.5e-08
|
||||
skip_tests:
|
||||
prerequisites: ! |
|
||||
pair pace
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 10 Mar 2021
|
||||
date_generated: Wed Apr 7 19:30:07 2021
|
||||
epsilon: 7.5e-09
|
||||
epsilon: 1.5e-08
|
||||
prerequisites: ! |
|
||||
pair pace
|
||||
pre_commands: ! |
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
---
|
||||
lammps_version: 21 Nov 2023
|
||||
date_generated: Thu Jan 18 11:01:50 2024
|
||||
epsilon: 5e-14
|
||||
epsilon: 1e-13
|
||||
skip_tests: intel
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
|
||||
Reference in New Issue
Block a user